Spaces:

Sagnik1750
/

Faceoff

Running

App Files Files Community

Sagnik1750 commited on Mar 7

Commit

c3b2feb

verified ·

1 Parent(s): a24463d

Create app.py

Browse files

Files changed (1) hide show

app.py +96 -0

app.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import streamlit as st
+import cv2
+import numpy as np
+import moviepy.editor as mp
+from transformers import (
+    ViTImageProcessor,
+    ViTForImageClassification,
+    pipeline
+)
+import torch
+# 1. Load Models
+@st.cache_resource
+def load_models():
+    # Visual model
+    vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
+    vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
+    # Audio model
+    audio_analyzer = pipeline(
+        "audio-classification",
+        model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
+    )
+    return vit_processor, vit_model, audio_analyzer
+# 2. Processing Functions
+def analyze_frame(frame, processor, model):
+    inputs = processor(images=frame, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+    return model.config.id2label[outputs.logits.argmax(-1).item()]
+def process_video(video_path, processor, model, audio_analyzer):
+    # Extract audio
+    video = mp.VideoFileClip(video_path)
+    audio_path = "temp_audio.wav"
+    video.audio.write_audiofile(audio_path)
+    # Analyze audio
+    audio_result = audio_analyzer(audio_path)
+    audio_emotion = max(audio_result, key=lambda x: x['score'])['label']
+    # Analyze video frames
+    cap = cv2.VideoCapture(video_path)
+    emotions = []
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret: break
+        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+        emotions.append(analyze_frame(frame, processor, model))
+    cap.release()
+    return {
+        'audio': audio_emotion,
+        'visual': max(set(emotions), key=emotions.count)
+    }
+# 3. Streamlit UI
+st.title("Video Sentiment Analyzer 🎥")
+st.markdown("""
+Analyze emotions from:
+- Facial expressions (ViT model)
+- Audio tone (wav2vec2 model)
+""")
+uploaded_file = st.file_uploader("Upload video (max 200MB)", type=["mp4", "avi"])
+if uploaded_file:
+    # Save to temp file
+    with open("temp_video.mp4", "wb") as f:
+        f.write(uploaded_file.getbuffer())
+    # Load models
+    vit_processor, vit_model, audio_analyzer = load_models()
+    # Process video
+    with st.spinner("Analyzing video..."):
+        result = process_video(
+            "temp_video.mp4",
+            vit_processor,
+            vit_model,
+            audio_analyzer
+        )
+    # Display results
+    col1, col2 = st.columns(2)
+    with col1:
+        st.subheader("🎧 Audio Analysis")
+        st.metric("Emotion", result['audio'])
+    with col2:
+        st.subheader("👁️ Visual Analysis")
+        st.metric("Dominant Emotion", result['visual'])
+    st.success("Analysis complete!")