Spaces:

Sagnik1750
/

emotion-analysis-ui

Sleeping

App Files Files Community

Sagnik1750 commited on Mar 6

Commit

0340596

verified ·

1 Parent(s): b353d28

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -326

app.py DELETED Viewed

@@ -1,326 +0,0 @@
-import cv2
-import mediapipe as mp
-import torch
-import numpy as np
-import matplotlib.pyplot as plt
-import seaborn as sns
-from facenet_pytorch import MTCNN
-from transformers import AutoFeatureExtractor, AutoModelForImageClassification, AutoProcessor, AutoModelForAudioClassification
-from PIL import Image
-import moviepy.editor as moviepy
-import librosa
-import os
-# Initialize device
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Initialize visual models
-mp_pose = mp.solutions.pose
-pose = mp_pose.Pose()
-mtcnn = MTCNN(device=device)
-face_model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
-face_extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")
-# Initialize audio model
-audio_model_name = "ehcalabres/wav2vec2-lg-xlsr-en-speech-emotion-recognition"
-audio_processor = AutoProcessor.from_pretrained(audio_model_name)
-audio_model = AutoModelForAudioClassification.from_pretrained(audio_model_name).to(device)
-audio_sampling_rate = 16000
-def calculate_angle(a, b, c):
-    """Calculates the angle between three points."""
-    a, b, c = np.array(a), np.array(b), np.array(c)
-    ba, bc = a - b, c - b
-    cosine_angle = np.dot(ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc))
-    return np.degrees(np.arccos(np.clip(cosine_angle, -1.0, 1.0)))
-def detect_emotions(frame):
-    """Detects facial emotions in a given frame."""
-    img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-    faces, _ = mtcnn.detect(img)
-    if faces is None or len(faces) == 0:
-        return "Neutral"  # Default to neutral if no face is detected
-    face = img.crop(faces[0])
-    inputs = face_extractor(images=face, return_tensors="pt").to(device)
-    outputs = face_model(**inputs)
-    probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
-    return face_model.config.id2label[torch.argmax(probs).item()]
-def classify_posture(back_angle, neck_angle):
-    """Classifies posture based on back and neck angles."""
-    if back_angle > 170 and neck_angle > 150:
-        return "Confident"
-    elif back_angle < 160 and neck_angle < 140:
-        return "Nervous"
-    elif back_angle < 150:
-        return "Defensive"
-    elif neck_angle < 130:
-        return "Serious"
-    else:
-        return "Attentive"
-def extract_audio(video_path):
-    """Extracts audio from video file and saves it as WAV."""
-    audio_path = "extracted_audio.wav"
-    video = moviepy.VideoFileClip(video_path)
-    video.audio.write_audiofile(audio_path, codec='pcm_s16le', verbose=False)
-    return audio_path
-def analyze_audio_emotion(audio_path):
-    """Analyzes emotion from audio file and returns emotion counts."""
-    # Load audio
-    y, sr = librosa.load(audio_path, sr=audio_sampling_rate)
-    # Process audio in chunks to avoid memory issues
-    chunk_length = audio_sampling_rate * 5  # 5 seconds
-    emotion_counts = {}
-    audio_emotions = []
-    # Process audio in chunks
-    for i in range(0, len(y), chunk_length):
-        chunk = y[i:min(i+chunk_length, len(y))]
-        # Skip chunks that are too short
-        if len(chunk) < audio_sampling_rate:
-            continue
-        # Process audio with the model
-        inputs = audio_processor(chunk, sampling_rate=audio_sampling_rate, return_tensors="pt").to(device)
-        with torch.no_grad():
-            outputs = audio_model(**inputs)
-        # Get prediction
-        predicted_class_id = torch.argmax(outputs.logits, dim=1).item()
-        emotion = audio_model.config.id2label[predicted_class_id]
-        audio_emotions.append(emotion)
-        emotion_counts[emotion] = emotion_counts.get(emotion, 0) + 1
-    return emotion_counts, audio_emotions
-def map_emotion_labels(emotion, source="face"):
-    """Standardizes emotion labels across different models."""
-    # Mapping dictionaries for different models
-    face_mapping = {
-        "happy": "Happy",
-        "sad": "Sad",
-        "angry": "Angry",
-        "surprise": "Surprised",
-        "fear": "Fearful",
-        "disgust": "Disgusted",
-        "neutral": "Neutral"
-    }
-    audio_mapping = {
-        "anger": "Angry",
-        "disgust": "Disgusted",
-        "fear": "Fearful",
-        "joy": "Happy",
-        "neutral": "Neutral",
-        "sadness": "Sad",
-        "surprise": "Surprised"
-    }
-    posture_mapping = {
-        "Confident": "Confident",
-        "Nervous": "Nervous",
-        "Defensive": "Defensive",
-        "Serious": "Serious",
-        "Attentive": "Attentive"
-    }
-    if source == "face":
-        return face_mapping.get(emotion.lower(), emotion)
-    elif source == "audio":
-        return audio_mapping.get(emotion.lower(), emotion)
-    elif source == "posture":
-        return posture_mapping.get(emotion, emotion)
-    return emotion
-def draw_multimodal_sentiment_bar(frame, face_emotion, posture_label, audio_emotion, major_emotion, major_emotion_percent):
-    """Draws multimodal emotion and posture sentiment on the frame."""
-    overlay = frame.copy()
-    cv2.rectangle(overlay, (10, 10), (450, 200), (255, 255, 255), -1)
-    # Display current emotions
-    cv2.putText(overlay, f'Face Emotion: {face_emotion}', (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    cv2.putText(overlay, f'Posture: {posture_label}', (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    cv2.putText(overlay, f'Audio Emotion: {audio_emotion}', (20, 100), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 2)
-    # Display major emotion
-    cv2.putText(overlay, f'Major Emotion: {major_emotion} ({major_emotion_percent:.1f}%)', (20, 130), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (255, 0, 0), 2)
-    # Add explanation
-    reason_text = 'Weighted combination of face, posture, and audio analysis'
-    cv2.putText(overlay, f'Analysis: {reason_text}', (20, 160), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1)
-    # Blend overlay with original frame
-    cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
-def generate_multimodal_charts(face_emotion_counts, posture_counts, audio_emotion_counts):
-    """Generates charts for all emotion modalities."""
-    # Create a figure with 3 subplots
-    fig, axs = plt.subplots(1, 3, figsize=(18, 6))
-    # Face emotions pie chart
-    labels, sizes = zip(*face_emotion_counts.items()) if face_emotion_counts else (["None"], [1])
-    axs[0].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Blues'))
-    axs[0].set_title("Facial Emotions")
-    # Posture pie chart
-    labels, sizes = zip(*posture_counts.items()) if posture_counts else (["None"], [1])
-    axs[1].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Greens'))
-    axs[1].set_title("Posture Analysis")
-    # Audio emotions pie chart
-    labels, sizes = zip(*audio_emotion_counts.items()) if audio_emotion_counts else (["None"], [1])
-    axs[2].pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('Reds'))
-    axs[2].set_title("Audio Emotions")
-    plt.tight_layout()
-    plt.savefig("multimodal_emotion_analysis.jpg")
-    plt.close()
-    # Create combined emotions bar chart
-    plt.figure(figsize=(12, 6))
-    # Combine all emotions across modalities
-    all_emotions = set()
-    for counts in [face_emotion_counts, audio_emotion_counts]:
-        all_emotions.update(counts.keys())
-    # Prepare data for each emotion across modalities
-    emotions = list(all_emotions)
-    face_values = [face_emotion_counts.get(e, 0) for e in emotions]
-    audio_values = [audio_emotion_counts.get(e, 0) for e in emotions]
-    # Normalize values
-    if sum(face_values) > 0:
-        face_values = [v/sum(face_values)*100 for v in face_values]
-    if sum(audio_values) > 0:
-        audio_values = [v/sum(audio_values)*100 for v in audio_values]
-    # Create bar chart
-    x = np.arange(len(emotions))
-    width = 0.35
-    fig, ax = plt.subplots(figsize=(14, 8))
-    ax.bar(x - width/2, face_values, width, label='Face')
-    ax.bar(x + width/2, audio_values, width, label='Audio')
-    ax.set_title('Emotion Distribution by Modality')
-    ax.set_xlabel('Emotions')
-    ax.set_ylabel('Percentage (%)')
-    ax.set_xticks(x)
-    ax.set_xticklabels(emotions)
-    ax.legend()
-    plt.tight_layout()
-    plt.savefig("emotion_comparison.jpg")
-    plt.close()
-def calculate_combined_sentiment(face_emotion_counts, posture_counts, audio_emotion_counts):
-    """Calculates a combined sentiment score from all modalities."""
-    # Define emotion categories and weights
-    modality_weights = {
-        "face": 0.4,
-        "posture": 0.2,
-        "audio": 0.4
-    }
-    # Map posture labels to emotional states for better combination
-    posture_emotion_mapping = {
-        "Confident": "Happy",
-        "Nervous": "Fearful",
-        "Defensive": "Angry",
-        "Serious": "Neutral",
-        "Attentive": "Neutral"
-    }
-    # Convert posture counts to emotion counts
-    posture_emotion_counts = {}
-    for posture, count in posture_counts.items():
-        emotion = posture_emotion_mapping.get(posture, "Neutral")
-        posture_emotion_counts[emotion] = posture_emotion_counts.get(emotion, 0) + count
-    # Get all unique emotions across all modalities
-    all_emotions = set()
-    for counts in [face_emotion_counts, posture_emotion_counts, audio_emotion_counts]:
-        all_emotions.update(counts.keys())
-    # Calculate total frames/samples for each modality
-    face_total = sum(face_emotion_counts.values())
-    posture_total = sum(posture_counts.values())
-    audio_total = sum(audio_emotion_counts.values())
-    # Calculate weighted emotion scores
-    combined_scores = {}
-    for emotion in all_emotions:
-        # Get normalized scores from each modality (or 0 if not present)
-        face_score = face_emotion_counts.get(emotion, 0) / face_total if face_total > 0 else 0
-        posture_score = posture_emotion_counts.get(emotion, 0) / posture_total if posture_total > 0 else 0
-        audio_score = audio_emotion_counts.get(emotion, 0) / audio_total if audio_total > 0 else 0
-        # Calculate weighted score
-        weighted_score = (
-            face_score * modality_weights["face"] +
-            posture_score * modality_weights["posture"] +
-            audio_score * modality_weights["audio"]
-        )
-        combined_scores[emotion] = weighted_score
-    # Normalize to percentages
-    total_score = sum(combined_scores.values())
-    if total_score > 0:
-        for emotion in combined_scores:
-            combined_scores[emotion] = (combined_scores[emotion] / total_score) * 100
-    # Get the major emotion
-    major_emotion = max(combined_scores.items(), key=lambda x: x[1]) if combined_scores else ("Unknown", 0)
-    return combined_scores, major_emotion[0], major_emotion[1]
-def process_video(input_path):
-    """Processes the video with multimodal sentiment analysis."""
-    # Extract audio first
-    print("Extracting audio from video...")
-    audio_path = extract_audio(input_path)
-    # Analyze audio emotions
-    print("Analyzing audio emotions...")
-    audio_emotion_counts, audio_emotions_sequence = analyze_audio_emotion(audio_path)
-    # Process video frames
-    print("Processing video frames...")
-    cap = cv2.VideoCapture(input_path)
-    fps = int(cap.get(cv2.CAP_PROP_FPS))
-    frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
-    out = cv2.VideoWriter("output_video.mp4", cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
-    # Initialize counters
-    face_emotion_counts = {}
-    posture_counts = {}
-    total_frames = 0
-    frame_index = 0
-    # Get total frames for progress tracking
-    total_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    # Calculate frames per audio segment
-    audio_segments = len(audio_emotions_sequence)
-    frames_per_audio = max(1, total_video_frames // audio_segments) if audio_segments > 0 else 1
-    current_audio_index = 0
-    while cap.isOpened():
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # Update progress
-        frame_index += 1
-        if frame_index % 30 == 0:  # Show progress every 30 frames
-            print(f"Processing frame {frame_index}/{total_video_frames}