Spaces:

echung682
/

emotion_detection_from_text_and_face

Running

File size: 7,424 Bytes

73150c9
 
01dcc89
73150c9
01dcc89
73150c9
 
 
 
01dcc89
 
73150c9
 
 
 
 
01dcc89
 
 
73150c9
 
01dcc89
 
73150c9
 
 
 
 
01dcc89
73150c9
 
 
01dcc89
c2f3fbb
 
 
73150c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd19745
4b9f81c
 
 
 
 
7e6b3cc
 
 
1bdf26c
0ee729f
7e6b3cc
4b9f81c
 
06fbfcf
7e6b3cc
4b9f81c
06fbfcf
dd19745
1bdf26c
7e6b3cc
 
4b9f81c
7e6b3cc
dd19745
73150c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd19745
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2f6554e
dd19745
 
 
 
2f5e6eb
eb18c01
2f5e6eb
2ff1a2a
2f5e6eb
2ff1a2a
2f5e6eb
2ff1a2a
2f5e6eb
2ff1a2a
2f5e6eb
2ff1a2a
2f5e6eb
2ff1a2a
dd19745
 
 
 
 
a281269
 
dd19745
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95136f8
dd19745
 
73150c9
bba4822
 
dd19745

from transformers import (
    AutoModelForSequenceClassification,  # For text emotion detection model
    AutoTokenizer, 
    pipeline,                           # For creating inference pipeline

)
from datetime import datetime
import matplotlib.pyplot as plt
import gradio as gr
from fer import FER
import cv2

# Dictionaries to store emotion data over time
text_dataDict = {"Time": [], "Emotion": [], "Confidence Score": []}
face_dataDict = {"Time": [], "Emotion": [], "Confidence Score": []}

# Load model and tokenizer directly from HuggingFace
emotionDetectModel = AutoModelForSequenceClassification.from_pretrained("borisn70/bert-43-multilabel-emotion-detection")
tokenizer = AutoTokenizer.from_pretrained("borisn70/bert-43-multilabel-emotion-detection")  # Load tokenizer directly from model
pipe = pipeline(task="text-classification", model=emotionDetectModel, tokenizer=tokenizer)

face_emotion_detector = FER()

localFormat = "%Y-%m-%d %H:%M:%S" #this is how will print the timestamp: year-month-day hour-minutes-seconds (army time)
#currTime = datetime.now().astimezone().strftime(localFormat) this returns the time in the localFormat
#current_Time_Tuple = time.strptime(str(currTime), str(localFormat)) #creates a tuple that contains each part of the local format separate
#current_Time_In_Seconds = time.mktime(current_Time_Tuple) #converts the tuple into the number of seconds

def emotionAnalysis(message, face):
    """
    Main function that processes both text and facial emotions
    Args:
        message (str): User input text
        face: Image input from Gradio interface, can be either:
            - numpy.ndarray: Direct webcam capture (RGB or BGR format)
            - str: File path to uploaded image
    Returns:
        tuple: (str, plt) Contains the emotion results text and the updated plot
    """
    if (message.lower() == "quit"):
        return "Quitting...", displayResults()
    
    # Process text emotion
    result = pipe(message)
    text_emotion = result[0]["label"]
    text_score = result[0]["score"]
    words_timestamp = datetime.now().astimezone().strftime(localFormat)
    
    # Store text emotion data for plotting
    text_dataDict["Time"].append(words_timestamp)
    text_dataDict["Emotion"].append(text_emotion)
    text_dataDict["Confidence Score"].append(round(text_score, 2))
    
    face_timestamp = datetime.now().astimezone().strftime(localFormat)

    # Initialize with default values
    face_emotion = "No image"  # Default value
    face_score = 0.0

    if face is not None:
        try:
            img_rgb = cv2.cvtColor(face, cv2.COLOR_BGR2RGB)
            result = face_emotion_detector.top_emotion(img_rgb)
            print(result)
            if result[0] is not None:  # Only update if we got a valid result
                face_emotion, face_score = result
            else:
                face_emotion = "No face detected"
                face_score = 0.0
        except Exception as e:
            face_emotion = f"Error processing image: {str(e)}"
            face_score = 0.0

    print(face_emotion)
    # Store facial emotion data for plotting
    face_dataDict["Time"].append(face_timestamp)
    face_dataDict["Emotion"].append(face_emotion)  # Now face_emotion will always be a string
    face_dataDict["Confidence Score"].append(face_score)

    # Return both the text result and the updated plot
    return f"Text: {text_emotion} | Face: {face_emotion}", displayResults()

def displayResults():
    """
    Creates and returns a matplotlib plot showing emotion trends over time
    Returns:
        matplotlib.pyplot: Plot object showing emotion analysis results
    """
    # Create a new figure with specified size
    plt.figure(figsize=(10, 6))
    
    # Set up plot labels and title
    plt.title("Emotions Detected Through Facial Expressions and Text Over Time")
    plt.xlabel("Time")
    plt.ylabel("Emotions")

    #plot facial emotions versus time where time is on the x-axis
    plt.plot(face_dataDict["Time"], face_dataDict["Emotion"], marker='o', linestyle='-', label="Facial Emotions")

    #plot facial emotions versus time where time is on the x-axis
    plt.plot(text_dataDict["Time"], text_dataDict["Emotion"], marker='o', linestyle='-', color='red', label="Text Emotions")

    #showing the graph and the legend
    plt.legend()
    plt.xticks(rotation=45)  # Rotate timestamps for better readability
    plt.tight_layout()       # Adjust layout to prevent label cutoff
    
    return plt


def process_webcam(img):
    """
    Process webcam frame and draw emotion detection results
    Args:
        img: Input image from webcam
    Returns:
        numpy.ndarray: Image with emotion detection results drawn on it
    """
    if img is None:
        return None
        
    try:
        # Convert to RGB for emotion detection
        #img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Detect faces and emotions
        #result = face_emotion_detector.detect_emotions(img_rgb)
        
        return img
    except Exception as e:
        print(f"Error processing image: {str(e)}")
        return img

'''
2 rows, 2 columns
column 1: inputs
    row 1, col 1 = user text input
    row 2, col 1 = camera live feed
column 2: outputs
    row 1, col 2 = emotion results
    row 2, col 2 = plt graph
'''
with gr.Blocks(title="Emotion Reader", theme=gr.themes.Soft()) as emotion_reader:
    gr.Markdown(
    """
    # Emotion Analysis from Text and Face
    ⚠️ This application will use your webcam to detect facial emotions. By using this app, you consent to webcam access. Type text and press Enter to analyze both text and facial emotions.
    
    Steps to use the app:
    
    1. Turn on the camera clicking where it says "Click to Access Webcam" and Allow access
        
    2. Click "Record" (the dropdown arrow is for if you want to change your camera)
        
    3. Type a sentence into the text input box
        
    4. Press "Enter" to see your results
        
    5. In the "Emotion Results" box, you will see something like "Text: (emotion) | Face: (emotion) " and the timeline will update
        
    6. You can press "Stop" to turn off the camera or type "quit" as your message to stop the program
    """
    )
    with gr.Row():
        with gr.Column(): #user text input
            text_input = gr.Textbox(
                label="Type your message here. Type 'quit' to see final results.",
                placeholder="Enter text"
            )
        with gr.Column(): #emotion results
            emotion_result = gr.Textbox(label="Emotion Results")
    
    with gr.Row():
        with gr.Column(): #camera live feed
            input_img = gr.Image(label="Webcam Feed", sources="webcam")
        with gr.Column(): #plt graph
            output_img = gr.Image(label="Emotion Detection", visible=False)
            plot_output = gr.Plot(value=displayResults(), label="Emotion Timeline")
    
    # Stream webcam with emotion detection
    input_img.stream(
        process_webcam,
        inputs=input_img,
        outputs=output_img,
        time_limit=15,
        stream_every=0.1,
        concurrency_limit=30
    )
    
    # Process text input
    text_input.submit(
        emotionAnalysis,
        inputs=[text_input, output_img],
        outputs=[emotion_result, plot_output]
    )

# Launch the interface
if __name__ == "__main__":
    emotion_reader.launch()