Real-time emotion detection in deep learning

Real-time emotion detection is a fascinating technology that enables us to analyze videos or even our faces in live webcam feeds and predict the emotions within the frames! In this Answer, we'll explore how to implement real-time emotion detection and experiment with it.

By an emotion detection application, we mean such an application that is able to intelligently recognize what primary emotion is on their face by observing their facial expressions.

Note: To understand how we detect emotions in single images, read this Answer first.

import os
import cv2
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.losses import categorical_crossentropy
from keras.optimizers import Adam
from keras.utils import np_utils
emotions = ['angry', 'disgusted', 'fearful', 'happy', 'neutral', 'sad', 'surprised']
num_features = 64
num_labels = len(emotions)
batch_size = 64
epochs = 50
width, height = 48, 48
def load_images_from_folder(folder_path):
    images = []
    labels = []
    for emotion_idx, emotion in enumerate(emotions):
        emotion_folder = os.path.join(folder_path, emotion)
        for filename in os.listdir(emotion_folder):
            img = cv2.imread(os.path.join(emotion_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (width, height))
                images.append(img)
                labels.append(emotion_idx)
    return np.array(images), np.array(labels)
train_images, train_labels = load_images_from_folder("train")
test_images, test_labels = load_images_from_folder("test")
train_images = train_images.reshape(train_images.shape[0], width, height, 1).astype('float32')
test_images = test_images.reshape(test_images.shape[0], width, height, 1).astype('float32')
train_images /= 255
test_images /= 255
train_labels = np_utils.to_categorical(train_labels, num_classes=num_labels)
test_labels = np_utils.to_categorical(test_labels, num_classes=num_labels)
model = Sequential()
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', input_shape=(width, height, 1)))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(num_labels, activation='softmax'))
model.compile(loss=categorical_crossentropy,
              optimizer=Adam(),
              metrics=['accuracy'])
model.fit(train_images, train_labels,
          batch_size=batch_size,
          epochs=epochs,
          verbose=1,
          validation_data = (test_images, test_labels),
          shuffle = True)
model.save("your_model.h5")
with open("your_model.json", "w") as json_file:
    json_file.write(model.to_json())

import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array
emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')
face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
video_capture = cv2.VideoCapture("https://media.istockphoto.com/id/1152822473/video/perplexed-disappointed-woman-posing-on-blue-background.mp4?s=mp4-640x640-is&k=20&c=RR9tmNeW_AA83lQdCWTdIIDV_Q97mq9-DOA7lhw5HCk=")
while True:
    success, frame_image = video_capture.read()
    if cv2.waitKey(1) == 27 or not success:
        break
    gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)
    detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)
    for (x, y, w, h) in detections:
        cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)
        face_region_gray = gray_frame[y:y+w, x:x+h]
        resized_face = cv2.resize(face_region_gray, (48, 48))
        face_pixels = img_to_array(resized_face)
        
        face_pixels = np.expand_dims(face_pixels, axis=0)
        face_pixels = face_pixels / 255
        predictions = emotion_model.predict(face_pixels)
        max_index = np.argmax(predictions[0])
        prediction = emotionList[max_index]
        text = f'Emotion: {prediction}'
        text_position = (x, y - 10)
        cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, 1, (255, 255, 255), 2)
    cv2.imshow('Emotion detection - Computer vision', frame_image)
video_capture.release()
cv2.destroyAllWindows()

Code explanation

Lines 1–4: First, we import the necessary libraries for our code, including OpenCV for computer vision tasks, NumPy for numerical operations, and Keras to work with deep learning models.
Line 6: Next, we define the list of emotions as emotionList. These are all the emotions our model can detect.
Lines 8–9: Moving on, we load our pre-trained emotion detection model from model_saved.json for the model architecture and model_saved.h5 for model weights using Keras.
Line 11: Now, it's time to initialize the Haar Cascade Classifier, used for detecting faces in images or video frames.
Line 13: We now create a video capture object and load the video from the specified URL. This will be the source of our input frames for emotion detection. You can add any video you like.
Lines 17–24: Inside the loop, we read each frame from the video, convert it to grayscale using cv2.cvtColor and save it in gray_frame. We then apply the Haar Cascade face detection algorithm using face_haar_cascade.detectMultiScale to find faces in the frame and save them in detections.
Lines 26–27: For each detected face, we draw a black rectangle around it on the frame using cv2.rectangle.
Lines 29–34: Next, we preprocess the face region for emotion detection. We resize the face image using resize, convert it to a numerical array using img_to_array, add an extra dimension to match the expected input format of the model using np.expand_dims, and normalize the pixel values by dividing by 255. The final preprocessed pixels are saved in face_pixels.
Line 36: Now, onto the most exciting part! We make emotion predictions using the pre-trained model and find the one with the highest probability from the emotionList and save it in prediction. We do this by using the max_index which points to the emotion with the highest probability.
Lines 41–46: Finally, we add emotion text to the frame using cv2.putText and display it using cv2.imshow.
Lines 48–49: After processing all frames, we release the object using video_capture.release() and close all OpenCV windows using cv2.destroyAllWindows().

Execution with Flask

We've set up a simple Flask server to render the results here for you, you can choose to run it on your local machine without integrating it with Flask as well. Click on "Run" to see a live demonstration!

from flask import Flask, render_template, Response
import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array

app = Flask(__name__)

emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')

face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')

def detect_emotions():
    video_capture = cv2.VideoCapture("https://media.istockphoto.com/id/1152822473/video/perplexed-disappointed-woman-posing-on-blue-background.mp4?s=mp4-640x640-is&k=20&c=RR9tmNeW_AA83lQdCWTdIIDV_Q97mq9-DOA7lhw5HCk=") 
    
    while True:
        success, frame_image = video_capture.read()
        if not success:
            break

        gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)

        detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)

        for (x, y, w, h) in detections:
            cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)

            face_region_gray = gray_frame[y:y+w, x:x+h]
            resized_face = cv2.resize(face_region_gray, (48, 48))
            face_pixels = img_to_array(resized_face)
            
            face_pixels = np.expand_dims(face_pixels, axis=0)
            face_pixels = face_pixels / 255

            predictions = emotion_model.predict(face_pixels)

            max_index = np.argmax(predictions[0])
            prediction = emotionList[max_index]

            text = f'Emotion: {prediction}'
            text_position = (x, y - 10)

            cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, 1, (255, 255, 255), 2)

        ret, buffer = cv2.imencode('.jpg', frame_image)
        frame = buffer.tobytes()
        yield (b'--frame\r\n'
               b'Content-Type: image/jpeg\r\n\r\n' + frame + b'\r\n')

    video_capture.release()

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/video_feed')
def video_feed():
    return Response(detect_emotions(), mimetype='multipart/x-mixed-replace; boundary=frame')

if __name__ == '__main__':
    app.run(debug = True, host = "0.0.0.0", port = 5000)

import cv2
import numpy as np
from keras.models import model_from_json
from keras.utils import img_to_array
emotionList = ('angry', 'disgust', 'fear', 'happy', 'sad', 'surprise', 'neutral')
emotion_model = model_from_json(open("model_saved.json", "r").read())
emotion_model.load_weights('model_saved.h5')
face_haar_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
video_capture = cv2.VideoCapture(0)
while True:
    success, frame_image = video_capture.read()
    if cv2.waitKey(1) == 27 or not success:
        break
    gray_frame = cv2.cvtColor(frame_image, cv2.COLOR_BGR2GRAY)
    detections = face_haar_cascade.detectMultiScale(gray_frame, 1.32, 5)
    for (x, y, w, h) in detections:
        cv2.rectangle(frame_image, (x, y), (x+w, y+h), (0, 0, 0), thickness=7)
        face_region_gray = gray_frame[y:y+w, x:x+h]
        resized_face = cv2.resize(face_region_gray, (48, 48))
        face_pixels = img_to_array(resized_face)
        
        face_pixels = np.expand_dims(face_pixels, axis=0)
        face_pixels = face_pixels / 255
        predictions = emotion_model.predict(face_pixels)
        max_index = np.argmax(predictions[0])
        prediction = emotionList[max_index]
        text = f'Emotion: {prediction}'
        text_position = (x, y - 10)
        cv2.putText(frame_image, text, text_position, cv2.FONT_ITALIC, (255, 255, 255), 2)
    cv2.imshow('Emotion detection - Computer vision', frame_image)
video_capture.release()
cv2.destroyAllWindows()

Code for emotion detection in web cameras

Free Resources

Learn in-demand tech skills in half the time

PRODUCTS

Mock Interview

New

Courses

Skill Paths

Projects

Assessments

Real-time emotion detection in deep learning

Emotion detection in videos

Technologies used

Training the model

Detecting emotions

Code explanation

Execution with Flask

Video emotion detection output

Emotion detection using your webcam

Benefits of emotion detection models