Sagnik1750 commited on
Commit
c3b2feb
·
verified ·
1 Parent(s): a24463d

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -0
app.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ import numpy as np
4
+ import moviepy.editor as mp
5
+ from transformers import (
6
+ ViTImageProcessor,
7
+ ViTForImageClassification,
8
+ pipeline
9
+ )
10
+ import torch
11
+
12
+ # 1. Load Models
13
+ @st.cache_resource
14
+ def load_models():
15
+ # Visual model
16
+ vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
17
+ vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
18
+
19
+ # Audio model
20
+ audio_analyzer = pipeline(
21
+ "audio-classification",
22
+ model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
23
+ )
24
+ return vit_processor, vit_model, audio_analyzer
25
+
26
+ # 2. Processing Functions
27
+ def analyze_frame(frame, processor, model):
28
+ inputs = processor(images=frame, return_tensors="pt")
29
+ with torch.no_grad():
30
+ outputs = model(**inputs)
31
+ return model.config.id2label[outputs.logits.argmax(-1).item()]
32
+
33
+ def process_video(video_path, processor, model, audio_analyzer):
34
+ # Extract audio
35
+ video = mp.VideoFileClip(video_path)
36
+ audio_path = "temp_audio.wav"
37
+ video.audio.write_audiofile(audio_path)
38
+
39
+ # Analyze audio
40
+ audio_result = audio_analyzer(audio_path)
41
+ audio_emotion = max(audio_result, key=lambda x: x['score'])['label']
42
+
43
+ # Analyze video frames
44
+ cap = cv2.VideoCapture(video_path)
45
+ emotions = []
46
+
47
+ while cap.isOpened():
48
+ ret, frame = cap.read()
49
+ if not ret: break
50
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
51
+ emotions.append(analyze_frame(frame, processor, model))
52
+
53
+ cap.release()
54
+ return {
55
+ 'audio': audio_emotion,
56
+ 'visual': max(set(emotions), key=emotions.count)
57
+ }
58
+
59
+ # 3. Streamlit UI
60
+ st.title("Video Sentiment Analyzer 🎥")
61
+ st.markdown("""
62
+ Analyze emotions from:
63
+ - Facial expressions (ViT model)
64
+ - Audio tone (wav2vec2 model)
65
+ """)
66
+
67
+ uploaded_file = st.file_uploader("Upload video (max 200MB)", type=["mp4", "avi"])
68
+
69
+ if uploaded_file:
70
+ # Save to temp file
71
+ with open("temp_video.mp4", "wb") as f:
72
+ f.write(uploaded_file.getbuffer())
73
+
74
+ # Load models
75
+ vit_processor, vit_model, audio_analyzer = load_models()
76
+
77
+ # Process video
78
+ with st.spinner("Analyzing video..."):
79
+ result = process_video(
80
+ "temp_video.mp4",
81
+ vit_processor,
82
+ vit_model,
83
+ audio_analyzer
84
+ )
85
+
86
+ # Display results
87
+ col1, col2 = st.columns(2)
88
+ with col1:
89
+ st.subheader("🎧 Audio Analysis")
90
+ st.metric("Emotion", result['audio'])
91
+
92
+ with col2:
93
+ st.subheader("👁️ Visual Analysis")
94
+ st.metric("Dominant Emotion", result['visual'])
95
+
96
+ st.success("Analysis complete!")