Spaces:
Running
Running
File size: 2,693 Bytes
c3b2feb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
import streamlit as st
import cv2
import numpy as np
import moviepy.editor as mp
from transformers import (
ViTImageProcessor,
ViTForImageClassification,
pipeline
)
import torch
# 1. Load Models
@st.cache_resource
def load_models():
# Visual model
vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
# Audio model
audio_analyzer = pipeline(
"audio-classification",
model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
)
return vit_processor, vit_model, audio_analyzer
# 2. Processing Functions
def analyze_frame(frame, processor, model):
inputs = processor(images=frame, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
return model.config.id2label[outputs.logits.argmax(-1).item()]
def process_video(video_path, processor, model, audio_analyzer):
# Extract audio
video = mp.VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path)
# Analyze audio
audio_result = audio_analyzer(audio_path)
audio_emotion = max(audio_result, key=lambda x: x['score'])['label']
# Analyze video frames
cap = cv2.VideoCapture(video_path)
emotions = []
while cap.isOpened():
ret, frame = cap.read()
if not ret: break
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
emotions.append(analyze_frame(frame, processor, model))
cap.release()
return {
'audio': audio_emotion,
'visual': max(set(emotions), key=emotions.count)
}
# 3. Streamlit UI
st.title("Video Sentiment Analyzer π₯")
st.markdown("""
Analyze emotions from:
- Facial expressions (ViT model)
- Audio tone (wav2vec2 model)
""")
uploaded_file = st.file_uploader("Upload video (max 200MB)", type=["mp4", "avi"])
if uploaded_file:
# Save to temp file
with open("temp_video.mp4", "wb") as f:
f.write(uploaded_file.getbuffer())
# Load models
vit_processor, vit_model, audio_analyzer = load_models()
# Process video
with st.spinner("Analyzing video..."):
result = process_video(
"temp_video.mp4",
vit_processor,
vit_model,
audio_analyzer
)
# Display results
col1, col2 = st.columns(2)
with col1:
st.subheader("π§ Audio Analysis")
st.metric("Emotion", result['audio'])
with col2:
st.subheader("ποΈ Visual Analysis")
st.metric("Dominant Emotion", result['visual'])
st.success("Analysis complete!") |