Faceoff / app.py
Sagnik1750's picture
Create app.py
c3b2feb verified
raw
history blame
2.69 kB
import streamlit as st
import cv2
import numpy as np
import moviepy.editor as mp
from transformers import (
ViTImageProcessor,
ViTForImageClassification,
pipeline
)
import torch
# 1. Load Models
@st.cache_resource
def load_models():
# Visual model
vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
# Audio model
audio_analyzer = pipeline(
"audio-classification",
model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
)
return vit_processor, vit_model, audio_analyzer
# 2. Processing Functions
def analyze_frame(frame, processor, model):
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
return model.config.id2label[outputs.logits.argmax(-1).item()]
def process_video(video_path, processor, model, audio_analyzer):
# Extract audio
video = mp.VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path)
# Analyze audio
audio_result = audio_analyzer(audio_path)
audio_emotion = max(audio_result, key=lambda x: x['score'])['label']
# Analyze video frames
cap = cv2.VideoCapture(video_path)
emotions = []
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
emotions.append(analyze_frame(frame, processor, model))
cap.release()
return {
'audio': audio_emotion,
'visual': max(set(emotions), key=emotions.count)
}
# 3. Streamlit UI
st.title("Video Sentiment Analyzer πŸŽ₯")
st.markdown("""
Analyze emotions from:
- Facial expressions (ViT model)
- Audio tone (wav2vec2 model)
""")
uploaded_file = st.file_uploader("Upload video (max 200MB)", type=["mp4", "avi"])
if uploaded_file:
# Save to temp file
with open("temp_video.mp4", "wb") as f:
f.write(uploaded_file.getbuffer())
# Load models
vit_processor, vit_model, audio_analyzer = load_models()
# Process video
with st.spinner("Analyzing video..."):
result = process_video(
"temp_video.mp4",
vit_processor,
vit_model,
audio_analyzer
)
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("🎧 Audio Analysis")
st.metric("Emotion", result['audio'])
with col2:
st.subheader("πŸ‘οΈ Visual Analysis")
st.metric("Dominant Emotion", result['visual'])
st.success("Analysis complete!")