Spaces:
Running
Running
import streamlit as st | |
import cv2 | |
import torch | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from facenet_pytorch import MTCNN | |
from transformers import AutoFeatureExtractor, AutoModelForImageClassification | |
from PIL import Image | |
from collections import Counter | |
import tempfile | |
import os | |
# Load models | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
mtcnn = MTCNN(device=device) | |
model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device) | |
extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression") | |
# Emotion labels | |
affectnet_labels = { | |
0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear", | |
5: "disgust", 6: "anger", 7: "contempt" | |
} | |
def detect_emotions(frame): | |
"""Detects facial emotions in a given frame.""" | |
img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)) | |
faces, _ = mtcnn.detect(img) | |
if faces is None or len(faces) == 0: | |
return "No Face Detected" | |
face = img.crop(faces[0]) | |
inputs = extractor(images=face, return_tensors="pt").to(device) | |
outputs = model(**inputs) | |
probs = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
return model.config.id2label[torch.argmax(probs).item()] | |
def process_video(input_path): | |
"""Processes video, overlays emotions, and creates a summary chart.""" | |
cap = cv2.VideoCapture(input_path) | |
fps = int(cap.get(cv2.CAP_PROP_FPS)) | |
frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) | |
# Create a temporary output video file | |
output_path = "output_video.mp4" | |
out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height)) | |
emotion_counts = [] | |
while cap.isOpened(): | |
ret, frame = cap.read() | |
if not ret: | |
break | |
emotion = detect_emotions(frame) | |
emotion_counts.append(emotion) | |
# Overlay emotion | |
overlay = frame.copy() | |
cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1) | |
cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2) | |
cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame) | |
out.write(frame) | |
cap.release() | |
out.release() | |
cv2.destroyAllWindows() | |
# Find major emotion | |
emotion_counter = Counter(emotion_counts) | |
major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected" | |
# Generate emotion distribution pie chart | |
plt.figure(figsize=(5, 5)) | |
labels, sizes = zip(*emotion_counter.items()) | |
plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel')) | |
plt.title("Emotion Distribution") | |
plt.savefig("emotion_distribution.jpg") | |
return output_path, plt, major_emotion | |
# Streamlit Web Interface | |
st.set_page_config(page_title="Emotion Analysis from Video", layout="wide") | |
st.title("π Emotion Analysis from Video π₯") | |
st.markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.") | |
# File uploader | |
video_input = st.file_uploader("π€ Upload Video (MP4, MOV, AVI)", type=["mp4", "mov", "avi"]) | |
if video_input is not None: | |
# Save uploaded video to a temporary file | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file: | |
tmp_file.write(video_input.read()) | |
video_path = tmp_file.name | |
# Process video | |
if st.button("π Analyze"): | |
with st.spinner("Processing video..."): | |
output_video, emotion_chart, major_emotion = process_video(video_path) | |
# Display results | |
st.subheader("π₯ Processed Video") | |