Real-time emotion detection in deep learning

Real-time emotion detection is a fascinating technology that enables us to analyze videos or even our faces in live webcam feeds and predict the emotions within the frames! In this Answer, we'll explore how to implement real-time emotion detection and experiment with it.

By an emotion detection application, we mean such an application that is able to intelligently recognize what primary emotion is on their face by observing their facial expressions.

Note: To understand how we detect emotions in single images, read this Answer first.

Emotion detection
Emotion detection

Emotion detection in videos

Similar to how emotions are detected in a single frame, we can capture each frame in a continuous video and apply our emotion detection techniques to it. This allows one to see the changes in emotions throughout the video instead of just showing a single emotion.

Mapping of emotions for each frame
Mapping of emotions for each frame

Technologies used

Similar to our emotion detection application for images, our code will include various libraries of Python, the most prominent ones being Keras and OpenCV.

OpenCV and Keras logo
OpenCV and Keras logo

Training the model

Prior to our main code, we will be setting up our model using a training and testing dataset. Below is the code we have used to establish our model first. We also save the model to reduce overhead.

import os
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.utils import np_utils
emotions = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
num_features = 64
num_labels = len(emotions)
batch_size = 64
epochs = 50
width, height = 48, 48
def load_images_from_folder(folder_path):
images = []
labels = []
for emotion_idx, emotion in enumerate(emotions):
emotion_folder = os.path.join(folder_path, emotion)
for filename in os.listdir(emotion_folder):
img = cv2.imread(os.path.join(emotion_folder, filename), cv2.IMREAD_GRAYSCALE)
if img is not None:
img = cv2.resize(img, (width, height))
images.append(img)
labels.append(emotion_idx)
return np.array(images), np.array(labels)
train_images, train_labels = load_images_from_folder("train")
test_images, test_labels = load_images_from_folder("test")
train_images = train_images.reshape(train_images.shape[0], width, height, 1).astype('float32')
test_images = test_images.reshape(test_images.shape[0], width, height, 1).astype('float32')
train_images /= 255
test_images /= 255
train_labels = np_utils.to_categorical(train_labels, num_classes=num_labels)
test_labels = np_utils.to_categorical(test_labels, num_classes=num_labels)
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(width, height, 1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_labels, activation='softmax'))
model.compile(loss=categorical_crossentropy,
optimizer=Adam(),
metrics=['accuracy'])
model.fit(train_images, train_labels,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data = (test_images, test_labels),
shuffle = True)
model.save("your_model.h5")
with open("your_model.json", "w") as json_file:
json_file.write(model.to_json())

In this answer, we have pre-trained our model so that it can give you quick results on any video link you feed it.

Detecting emotions

Now that our model is ready, we can detect emotions in videos. It can also observe the changing emotions in the video!

import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array
emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')
face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
video_capture = cv2.VideoCapture("https://media.istockphoto.com/id/1152822473/video/perplexed-disappointed-woman-posing-on-blue-background.mp4?s=mp4-640x640-is&k=20&c=RR9tmNeW_AA83lQdCWTdIIDV_Q97mq9-DOA7lhw5HCk=")
while True:
success, frame_image = video_capture.read()
if cv2.waitKey(1) == 27 or not success:
break
gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)
detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)
for (x, y, w, h) in detections:
cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)
face_region_gray = gray_frame[y:y+w, x:x+h]
resized_face = cv2.resize(face_region_gray, (48, 48))
face_pixels = img_to_array(resized_face)
face_pixels = np.expand_dims(face_pixels, axis=0)
face_pixels = face_pixels / 255
predictions = emotion_model.predict(face_pixels)
max_index = np.argmax(predictions[0])
prediction = emotionList[max_index]
text = f'Emotion: {prediction}'
text_position = (x, y - 10)
cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, 1, (255, 255, 255), 2)
cv2.imshow('Emotion detection - Computer vision', frame_image)
video_capture.release()
cv2.destroyAllWindows()

Code explanation

  • Lines 1–4: First, we import the necessary libraries for our code, including OpenCV for computer vision tasks, NumPy for numerical operations, and Keras to work with deep learning models.

  • Line 6: Next, we define the list of emotions as emotionList. These are all the emotions our model can detect.

  • Lines 8–9: Moving on, we load our pre-trained emotion detection model from model_saved.json for the model architecture and model_saved.h5 for model weights using Keras.

  • Line 11: Now, it's time to initialize the Haar Cascade Classifier, used for detecting faces in images or video frames.

  • Line 13: We now create a video capture object and load the video from the specified URL. This will be the source of our input frames for emotion detection. You can add any video you like.

  • Lines 17–24: Inside the loop, we read each frame from the video, convert it to grayscale using cv2.cvtColor and save it in gray_frame. We then apply the Haar Cascade face detection algorithm using face_haar_cascade.detectMultiScale to find faces in the frame and save them in detections.

  • Lines 26–27: For each detected face, we draw a black rectangle around it on the frame using cv2.rectangle.

  • Lines 29–34: Next, we preprocess the face region for emotion detection. We resize the face image using resize, convert it to a numerical array using img_to_array, add an extra dimension to match the expected input format of the model using np.expand_dims, and normalize the pixel values by dividing by 255. The final preprocessed pixels are saved in face_pixels.

  • Line 36: Now, onto the most exciting part! We make emotion predictions using the pre-trained model and find the one with the highest probability from the emotionList and save it in prediction. We do this by using the max_index which points to the emotion with the highest probability.

  • Lines 41–46: Finally, we add emotion text to the frame using cv2.putText and display it using cv2.imshow.

  • Lines 48–49: After processing all frames, we release the object using video_capture.release() and close all OpenCV windows using cv2.destroyAllWindows().

Execution with Flask

We've set up a simple Flask server to render the results here for you, you can choose to run it on your local machine without integrating it with Flask as well. Click on "Run" to see a live demonstration!

from flask import Flask, render_template, Response
import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array

app = Flask(__name__)

emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')

face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

def detect_emotions():
    video_capture = cv2.VideoCapture("https://media.istockphoto.com/id/1152822473/video/perplexed-disappointed-woman-posing-on-blue-background.mp4?s=mp4-640x640-is&k=20&c=RR9tmNeW_AA83lQdCWTdIIDV_Q97mq9-DOA7lhw5HCk=") 
    
    while True:
        success, frame_image = video_capture.read()
        if not success:
            break

        gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)

        detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)

        for (x, y, w, h) in detections:
            cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)

            face_region_gray = gray_frame[y:y+w, x:x+h]
            resized_face = cv2.resize(face_region_gray, (48, 48))
            face_pixels = img_to_array(resized_face)
            
            face_pixels = np.expand_dims(face_pixels, axis=0)
            face_pixels = face_pixels / 255

            predictions = emotion_model.predict(face_pixels)

            max_index = np.argmax(predictions[0])
            prediction = emotionList[max_index]

            text = f'Emotion: {prediction}'
            text_position = (x, y - 10)

            cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, 1, (255, 255, 255), 2)

        ret, buffer = cv2.imencode('.jpg', frame_image)
        frame = buffer.tobytes()
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

    video_capture.release()

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/video_feed')
def video_feed():
    return Response(detect_emotions(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == '__main__':
    app.run(debug = True, host = "0.0.0.0", port = 5000)

Video emotion detection output

Yay, we're done with the emotion detection code! Feel free to try it out on different videos to see how well it identifies emotions in faces.

Emotion detection using your webcam

We can use the same code as above for the majority of our steps and just replace the source of our video to be directly aligned with our machine's camera.

You can also make it work using an external camera attached to your machine!

import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array
emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')
face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
video_capture = cv2.VideoCapture(0)
while True:
success, frame_image = video_capture.read()
if cv2.waitKey(1) == 27 or not success:
break
gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)
detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)
for (x, y, w, h) in detections:
cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)
face_region_gray = gray_frame[y:y+w, x:x+h]
resized_face = cv2.resize(face_region_gray, (48, 48))
face_pixels = img_to_array(resized_face)
face_pixels = np.expand_dims(face_pixels, axis=0)
face_pixels = face_pixels / 255
predictions = emotion_model.predict(face_pixels)
max_index = np.argmax(predictions[0])
prediction = emotionList[max_index]
text = f'Emotion: {prediction}'
text_position = (x, y - 10)
cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, (255, 255, 255), 2)
cv2.imshow('Emotion detection - Computer vision', frame_image)
video_capture.release()
cv2.destroyAllWindows()
Code for emotion detection in web cameras

Note: Run this code on your local machine so that your code can connect to the webcam.

Benefits of emotion detection models

Benefits and applications of an emotion detection model

Note: Here's the complete list of related projects in MediaPipe or deep learning.

  1. Real time 3D face mesh

  2. Gesture recognizer

  3. Language detection

  4. Pose detection

  5. Emotion detection

  6. Real time emotion detection

How well did you understand the concept?

Question

What do we use the Haar Cascade face detection algorithm for?

Show Answer

Free Resources

Copyright ©2024 Educative, Inc. All rights reserved