Spaces:

Sagnik1750
/

emotion-analysis-ui

Sleeping

App Files Files Community

Sagnik1750 commited on Mar 6

Commit

e671129

verified ·

1 Parent(s): 86aff99

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -446

app.py CHANGED Viewed

@@ -1,483 +1,111 @@
 import cv2
-import mediapipe as mp
 import torch
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from facenet_pytorch import MTCNN
-from transformers import AutoFeatureExtractor, AutoModelForImageClassification, AutoProcessor, AutoModelForAudioClassification, Wav2Vec2Processor, Wav2Vec2CTCTokenizer, Wav2Vec2FeatureExtractor
 from PIL import Image
-import moviepy.editor as moviepy
-import librosa
 import os
-import gradio as gr
-import tempfile
-# Initialize device
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-print(f"Using device: {device}")
-# Initialize visual models
-mp_pose = mp.solutions.pose
-pose = mp_pose.Pose(static_image_mode=False, min_detection_confidence=0.5)
 mtcnn = MTCNN(device=device)
-face_model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
-face_extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")
-# Initialize audio model
-audio_model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
-audio_processor = AutoFeatureExtractor.from_pretrained(audio_model_name)
-audio_model = AutoModelForAudioClassification.from_pretrained(audio_model_name).to(device)
-audio_sampling_rate = 16000
-import os
-def analyze_video(video_path):
-    if not video_path:
-        return "Error: No video input received!"
-    # Check file size (limit to ~50MB for Hugging Face Spaces)
-    max_size_mb = 50
-    file_size_mb = os.path.getsize(video_path) / (1024 * 1024)
-    if file_size_mb > max_size_mb:
-        return f"Error: File size ({file_size_mb:.2f}MB) exceeds the {max_size_mb}MB limit."
-    # Process the video normally
-    audio_path = extract_audio(video_path)
-    frames = extract_frames(video_path)
-    facial_emotions = analyze_facial_emotion(frames)
-    transcription = transcribe_audio(audio_path)
-    audio_emotion, _ = analyze_audio_emotion(transcription)
-    final_emotion = max(facial_emotions, key=facial_emotions.get) if facial_emotions else "Neutral"
-    return transcription, audio_emotion, final_emotion, facial_emotions, "emotion_pie_chart.png"
-def calculate_angle(a, b, c):
-    """Calculates the angle between three points."""
-    a, b, c = np.array(a), np.array(b), np.array(c)
-    ba, bc = a - b, c - b
-    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
-    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))
 def detect_emotions(frame):
     """Detects facial emotions in a given frame."""
     img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
     faces, _ = mtcnn.detect(img)
     if faces is None or len(faces) == 0:
-        return "Neutral"  # Default to neutral if no face is detected
-    face = img.crop((faces[0][0], faces[0][1], faces[0][2], faces[0][3]))
-    inputs = face_extractor(images=face, return_tensors="pt").to(device)
-    outputs = face_model(**inputs)
-    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    return face_model.config.id2label[torch.argmax(probs).item()]
-def classify_posture(back_angle, neck_angle):
-    """Classifies posture based on back and neck angles."""
-    if back_angle > 170 and neck_angle > 150:
-        return "Confident"
-    elif back_angle < 160 and neck_angle < 140:
-        return "Nervous"
-    elif back_angle < 150:
-        return "Defensive"
-    elif neck_angle < 130:
-        return "Serious"
-    else:
-        return "Attentive"
-def extract_audio(video_path):
-    """Extracts audio from video file and saves it as WAV."""
-    audio_path = tempfile.mktemp(suffix='.wav')
-    video = moviepy.VideoFileClip(video_path)
-    video.audio.write_audiofile(audio_path, codec='pcm_s16le', verbose=False, logger=None)
-    return audio_path
-def analyze_audio_emotion(audio_path):
-    """Analyzes emotion from audio file and returns emotion counts."""
-    # Load audio
-    y, sr = librosa.load(audio_path, sr=audio_sampling_rate)
-    # Process audio in chunks to avoid memory issues
-    chunk_length = audio_sampling_rate * 5  # 5 seconds
-    emotion_counts = {}
-    audio_emotions = []
-    # Process audio in chunks
-    for i in range(0, len(y), chunk_length):
-        chunk = y[i:min(i+chunk_length, len(y))]
-        # Skip chunks that are too short
-        if len(chunk) < audio_sampling_rate:
-            continue
-        # Process audio with the model
-        inputs = audio_processor(chunk, sampling_rate=audio_sampling_rate, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = audio_model(**inputs)
-        # Get prediction
-        predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
-        emotion = audio_model.config.id2label[predicted_class_id]
-        audio_emotions.append(emotion)
-        emotion_counts[emotion] = emotion_counts.get(emotion, 0) + 1
-    return emotion_counts, audio_emotions
-def draw_multimodal_sentiment_bar(frame, face_emotion, posture_label, audio_emotion, major_emotion, major_emotion_percent):
-    """Draws multimodal emotion and posture sentiment on the frame."""
-    overlay = frame.copy()
-    cv2.rectangle(overlay, (10, 10), (450, 200), (255, 255, 255), -1)
-    # Display current emotions
-    cv2.putText(overlay, f'Face Emotion: {face_emotion}', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    cv2.putText(overlay, f'Posture: {posture_label}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    cv2.putText(overlay, f'Audio Emotion: {audio_emotion}', (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    # Display major emotion
-    cv2.putText(overlay, f'Major Emotion: {major_emotion} ({major_emotion_percent:.1f}%)', (20, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
-    # Add explanation
-    reason_text = 'Weighted combination of face, posture, and audio analysis'
-    cv2.putText(overlay, f'Analysis: {reason_text}', (20, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-    # Blend overlay with original frame
-    cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
-def generate_multimodal_charts(face_emotion_counts, posture_counts, audio_emotion_counts):
-    """Generates charts for all emotion modalities."""
-    # Create a figure with 3 subplots
-    fig, axs = plt.subplots(1, 3, figsize=(18, 6))
-    # Face emotions pie chart
-    labels, sizes = zip(*face_emotion_counts.items()) if face_emotion_counts else (["None"], [1])
-    axs[0].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Blues'))
-    axs[0].set_title("Facial Emotions")
-    # Posture pie chart
-    labels, sizes = zip(*posture_counts.items()) if posture_counts else (["None"], [1])
-    axs[1].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Greens'))
-    axs[1].set_title("Posture Analysis")
-    # Audio emotions pie chart
-    labels, sizes = zip(*audio_emotion_counts.items()) if audio_emotion_counts else (["None"], [1])
-    axs[2].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Reds'))
-    axs[2].set_title("Audio Emotions")
-    plt.tight_layout()
-    # Save to a temporary file
-    chart_path = tempfile.mktemp(suffix='.jpg')
-    plt.savefig(chart_path)
-    plt.close()
-    # Create combined emotions bar chart
-    plt.figure(figsize=(12, 6))
-    # Combine all emotions across modalities
-    all_emotions = set()
-    for counts in [face_emotion_counts, audio_emotion_counts]:
-        all_emotions.update(counts.keys())
-    # Prepare data for each emotion across modalities
-    emotions = list(all_emotions)
-    face_values = [face_emotion_counts.get(e, 0) for e in emotions]
-    audio_values = [audio_emotion_counts.get(e, 0) for e in emotions]
-    # Normalize values
-    if sum(face_values) > 0:
-        face_values = [v/sum(face_values)*100 for v in face_values]
-    if sum(audio_values) > 0:
-        audio_values = [v/sum(audio_values)*100 for v in audio_values]
-    # Create bar chart
-    x = np.arange(len(emotions))
-    width = 0.35
-    fig, ax = plt.subplots(figsize=(14, 8))
-    ax.bar(x - width/2, face_values, width, label='Face')
-    ax.bar(x + width/2, audio_values, width, label='Audio')
-    ax.set_title('Emotion Distribution by Modality')
-    ax.set_xlabel('Emotions')
-    ax.set_ylabel('Percentage (%)')
-    ax.set_xticks(x)
-    ax.set_xticklabels(emotions)
-    ax.legend()
-    plt.tight_layout()
-    # Save to a temporary file
-    comparison_path = tempfile.mktemp(suffix='.jpg')
-    plt.savefig(comparison_path)
-    plt.close()
-    return chart_path, comparison_path
-def calculate_combined_sentiment(face_emotion_counts, posture_counts, audio_emotion_counts):
-    """Calculates a combined sentiment score from all modalities."""
-    # Define emotion categories and weights
-    modality_weights = {
-        "face": 0.4,
-        "posture": 0.2,
-        "audio": 0.4
-    }
-    # Map posture labels to emotional states for better combination
-    posture_emotion_mapping = {
-        "Confident": "Happy",
-        "Nervous": "Fearful",
-        "Defensive": "Angry",
-        "Serious": "Neutral",
-        "Attentive": "Neutral"
-    }
-    # Convert posture counts to emotion counts
-    posture_emotion_counts = {}
-    for posture, count in posture_counts.items():
-        emotion = posture_emotion_mapping.get(posture, "Neutral")
-        posture_emotion_counts[emotion] = posture_emotion_counts.get(emotion, 0) + count
-    # Get all unique emotions across all modalities
-    all_emotions = set()
-    for counts in [face_emotion_counts, posture_emotion_counts, audio_emotion_counts]:
-        all_emotions.update(counts.keys())
-    # Calculate total frames/samples for each modality
-    face_total = sum(face_emotion_counts.values())
-    posture_total = sum(posture_counts.values())
-    audio_total = sum(audio_emotion_counts.values())
-    # Calculate weighted emotion scores
-    combined_scores = {}
-    for emotion in all_emotions:
-        # Get normalized scores from each modality (or 0 if not present)
-        face_score = face_emotion_counts.get(emotion, 0) / face_total if face_total > 0 else 0
-        posture_score = posture_emotion_counts.get(emotion, 0) / posture_total if posture_total > 0 else 0
-        audio_score = audio_emotion_counts.get(emotion, 0) / audio_total if audio_total > 0 else 0
-        # Calculate weighted score
-        weighted_score = (
-            face_score * modality_weights["face"] +
-            posture_score * modality_weights["posture"] +
-            audio_score * modality_weights["audio"]
-        )
-        combined_scores[emotion] = weighted_score
-    # Normalize to percentages
-    total_score = sum(combined_scores.values())
-    if total_score > 0:
-        for emotion in combined_scores:
-            combined_scores[emotion] = (combined_scores[emotion] / total_score) * 100
-    # Get the major emotion
-    major_emotion = max(combined_scores.items(), key=lambda x: x[1]) if combined_scores else ("Unknown", 0)
-    return combined_scores, major_emotion[0], major_emotion[1]
-def process_video_for_gradio(video_path, progress=gr.Progress()):
-    """Processes the video for Gradio interface with progress updates."""
-    # Extract audio first
-    progress(0.1, "Extracting audio from video...")
-    audio_path = extract_audio(video_path)
-    # Analyze audio emotions
-    progress(0.2, "Analyzing audio emotions...")
-    audio_emotion_counts, audio_emotions_sequence = analyze_audio_emotion(audio_path)
-    # Process video frames
-    progress(0.3, "Starting video frame analysis...")
-    cap = cv2.VideoCapture(video_path)
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    # Create a temporary file for the output video
-    output_path = tempfile.mktemp(suffix='.mp4')
-    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
-    # Initialize counters
-    face_emotion_counts = {}
-    posture_counts = {}
-    total_frames = 0
-    frame_index = 0
-    # Get total frames for progress tracking
-    total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # For very long videos, we might want to sample frames
-    sample_rate = max(1, total_video_frames // 300)  # Process at most ~300 frames
-    # Calculate frames per audio segment
-    audio_segments = len(audio_emotions_sequence)
-    frames_per_audio = max(1, total_video_frames // audio_segments) if audio_segments > 0 else 1
-    current_audio_index = 0
-    # Current audio emotion
-    current_audio_emotion = audio_emotions_sequence[0] if audio_emotions_sequence else "Unknown"
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
-        frame_index += 1
-        # Skip frames according to sample rate
-        if frame_index % sample_rate != 0:
-            continue
-        # Update progress
-        progress_value = 0.3 + (0.6 * frame_index / total_video_frames)
-        progress(progress_value, f"Processing frame {frame_index}/{total_video_frames}")
-        # Track the frame
-        total_frames += 1
-        # Update current audio emotion based on frame index
-        current_audio_index = min(frame_index // frames_per_audio, len(audio_emotions_sequence) - 1)
-        if current_audio_index >= 0 and current_audio_index < len(audio_emotions_sequence):
-            current_audio_emotion = audio_emotions_sequence[current_audio_index]
-        # Process the frame for face and posture
-        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-        result = pose.process(rgb_frame)
-        posture_label = "Unknown"
-        if result.pose_landmarks:
-            landmarks = result.pose_landmarks.landmark
-            try:
-                shoulder = [landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].x, landmarks[mp_pose.PoseLandmark.LEFT_SHOULDER].y]
-                hip = [landmarks[mp_pose.PoseLandmark.LEFT_HIP].x, landmarks[mp_pose.PoseLandmark.LEFT_HIP].y]
-                knee = [landmarks[mp_pose.PoseLandmark.LEFT_KNEE].x, landmarks[mp_pose.PoseLandmark.LEFT_KNEE].y]
-                ear = [landmarks[mp_pose.PoseLandmark.LEFT_EAR].x, landmarks[mp_pose.PoseLandmark.LEFT_EAR].y]
-                back_angle = calculate_angle(shoulder, hip, knee)
-                neck_angle = calculate_angle(ear, shoulder, hip)
-                posture_label = classify_posture(back_angle, neck_angle)
-            except:
-                # If any landmark is missing, use default
-                posture_label = "Unknown"
-        # Update posture counts
-        posture_counts[posture_label] = posture_counts.get(posture_label, 0) + 1
-        # Detect face emotion
-        try:
-            face_emotion = detect_emotions(frame)
-        except Exception as e:
-            face_emotion = "Neutral"
-            print(f"Face detection error: {e}")
-        # Update face emotion counts
-        face_emotion_counts[face_emotion] = face_emotion_counts.get(face_emotion, 0) + 1
-        # Calculate current major emotion
-        combined_scores, major_emotion, major_emotion_percent = calculate_combined_sentiment(
-            face_emotion_counts, posture_counts, audio_emotion_counts
-        )
-        # Draw sentiment info on the frame
-        draw_multimodal_sentiment_bar(frame, face_emotion, posture_label, current_audio_emotion, major_emotion, major_emotion_percent)
-        # Write the frame to output video
         out.write(frame)
-    # Release resources
     cap.release()
     out.release()
-    # Generate charts
-    progress(0.9, "Generating emotion charts...")
-    chart_path, comparison_path = generate_multimodal_charts(face_emotion_counts, posture_counts, audio_emotion_counts)
-    # Clean up temporary audio file
-    try:
-        os.remove(audio_path)
-    except:
-        pass
-    progress(1.0, "Analysis complete!")
-    # Prepare result summary
-    combined_scores, major_emotion, major_emotion_percent = calculate_combined_sentiment(
-        face_emotion_counts, posture_counts, audio_emotion_counts
-    )
-    result_summary = f"""
-    # Video Sentiment Analysis Results
-    ## Overall Sentiment
-    The dominant emotion in this video is: **{major_emotion}** ({major_emotion_percent:.1f}%)
-    ## Emotion Distribution
-    ### Face Emotions:
-    {', '.join([f"{emotion}: {count}" for emotion, count in face_emotion_counts.items()])}
-    ### Posture Analysis:
-    {', '.join([f"{posture}: {count}" for posture, count in posture_counts.items()])}
-    ### Audio Emotions:
-    {', '.join([f"{emotion}: {count}" for emotion, count in audio_emotion_counts.items()])}
-    ### Combined Emotion Scores:
-    {', '.join([f"{emotion}: {score:.1f}%" for emotion, score in combined_scores.items()])}
-    """
-    return output_path, chart_path, comparison_path, result_summary
-# Create Gradio interface
-def create_gradio_interface():
-    with gr.Blocks(title="Multimodal Video Sentiment Analysis") as demo:
-        gr.Markdown("# 📹 Multimodal Video Sentiment Analysis")
-        gr.Markdown("""
-        This app analyzes videos for emotions using three modalities:
-        - 😊 **Facial Expressions**: Detects emotions from faces
-        - 🧍‍♂️ **Body Posture**: Identifies emotional cues from posture
-        - 🔊 **Audio Tone**: Analyzes voice for emotional content
-        Upload a video to see the combined analysis!
-        """)
-        with gr.Row():
-            with gr.Column(scale=1):
-                video_input = gr.Video(label="Upload Video")
-                analyze_btn = gr.Button("Analyze Video", variant="primary")
-            with gr.Column(scale=2):
-                with gr.Tabs():
-                    with gr.TabItem("Results Summary"):
-                        result_text = gr.Markdown(label="Analysis Results")
-                    with gr.TabItem("Processed Video"):
-                        video_output = gr.Video(label="Processed Video")
-                    with gr.TabItem("Emotion Charts"):
-                        chart_output = gr.Image(label="Emotion Distribution")
-                        comparison_output = gr.Image(label="Modality Comparison")
-        analyze_btn.click(
-            process_video_for_gradio,
-            inputs=[video_input],
-            outputs=[video_output, chart_output, comparison_output, result_text]
-        )
-        gr.Markdown("""
-        ## How it works
-        1. **Visual Analysis**: The app processes video frames to detect faces and body posture
-        2. **Audio Analysis**: The audio is extracted and analyzed for emotional tone
-        3. **Combined Analysis**: The results are weighted and combined for a holistic emotional assessment
-        The app uses pretrained models for each modality and combines their outputs using a weighted approach.
-        """)
-    return demo
-# Launch the Gradio app
-if __name__ == "__main__":
-    demo = create_gradio_interface()
-    demo.launch()

+import gradio as gr
 import cv2
 import torch
 import numpy as np
+import mediapipe as mp
 import matplotlib.pyplot as plt
 import seaborn as sns
 from facenet_pytorch import MTCNN
+from transformers import AutoFeatureExtractor, AutoModelForImageClassification
 from PIL import Image
 import os
+from collections import Counter
+# Load models
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
 mtcnn = MTCNN(device=device)
+model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
+extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")
+# Emotion labels
+affectnet_labels = {
+    0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear",
+    5: "disgust", 6: "anger", 7: "contempt"
+}
 def detect_emotions(frame):
     """Detects facial emotions in a given frame."""
     img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
     faces, _ = mtcnn.detect(img)
     if faces is None or len(faces) == 0:
+        return "No Face Detected"
+    face = img.crop(faces[0])
+    inputs = extractor(images=face, return_tensors="pt").to(device)
+    outputs = model(**inputs)
+    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    return model.config.id2label[torch.argmax(probs).item()]
+def process_video(input_path):
+    """Processes video, overlays emotions, and creates a summary chart."""
+    cap = cv2.VideoCapture(input_path)
     fps = int(cap.get(cv2.CAP_PROP_FPS))
     frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+    out = cv2.VideoWriter("output_video.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
+    emotion_counts = []
     while cap.isOpened():
         ret, frame = cap.read()
         if not ret:
             break
+        emotion = detect_emotions(frame)
+        emotion_counts.append(emotion)
+        # Overlay emotion
+        overlay = frame.copy()
+        cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1)
+        cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
+        cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
         out.write(frame)
     cap.release()
     out.release()
+    cv2.destroyAllWindows()
+    # Find major emotion
+    emotion_counter = Counter(emotion_counts)
+    major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected"
+    # Generate emotion distribution pie chart
+    plt.figure(figsize=(5, 5))
+    labels, sizes = zip(*emotion_counter.items())
+    plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel'))
+    plt.title("Emotion Distribution")
+    plt.savefig("emotion_distribution.jpg")
+    return "output_video.mp4", plt, major_emotion
+# Gradio Web Interface
+with gr.Blocks(css="""
+    .gradio-container { max-width: 750px !important; margin: auto; background-color: #f8f9fa; padding: 20px; border-radius: 15px; }
+    .gradio-container h1 { font-size: 22px; text-align: center; color: #333; }
+    .gradio-container .gr-button { background-color: #007bff; color: white; border-radius: 10px; padding: 8px 15px; }
+    .gradio-container .gr-textbox { font-size: 16px; font-weight: bold; color: #007bff; }
+    .gradio-container .gr-file { border-radius: 10px; padding: 5px; }
+    @media screen and (max-width: 768px) {
+        .gradio-container { width: 100%; padding: 10px; }
+        .gradio-container h1 { font-size: 18px; }
+    }
+""") as demo:
+    gr.Markdown("# 🎭 Emotion Analysis from Video 🎥")
+    gr.Markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.")
+    with gr.Row():
+        video_input = gr.File(label="📤 Upload Video (MP4, MOV, AVI)")
+    with gr.Row():
+        process_button = gr.Button("🚀 Analyze")
+    with gr.Row():
+        video_output = gr.File(label="📥 Processed Video")
+        emotion_chart = gr.Plot(label="📊 Emotion Distribution Chart")
+    major_emotion_output = gr.Textbox(label="🔥 Major Emotion Detected", interactive=False)
+    process_button.click(fn=process_video, inputs=video_input, outputs=[video_output, emotion_chart, major_emotion_output])
+demo.launch()