Sagnik1750 commited on
Commit
7237c76
Β·
verified Β·
1 Parent(s): c3ee3ee

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -116
app.py CHANGED
@@ -1,130 +1,107 @@
1
- # app.py
2
  import streamlit as st
3
  import cv2
4
- import numpy as np
5
- import moviepy.editor as mp
6
- from transformers import ViTImageProcessor, ViTForImageClassification
7
  import torch
8
- from speechbrain.pretrained import EncoderClassifier
 
 
 
 
 
 
9
  import tempfile
10
  import os
11
 
12
- # 1. Load Models with caching
13
- @st.cache_resource
14
- def load_models():
15
- # Load ViT model for facial emotion detection
16
- vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
17
- vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')
18
-
19
- # Load SpeechBrain model for audio emotion recognition
20
- audio_classifier = EncoderClassifier.from_hparams(
21
- source="speechbrain/emotion-recognition-wav2vec2-IEMOCAP",
22
- savedir="pretrained_models/emotion-audio"
23
- )
24
-
25
- return vit_processor, vit_model, audio_classifier
26
-
27
- # 2. Video Processing Functions
28
- def analyze_frame(frame, processor, model):
29
- """Analyze single frame using ViT model"""
30
- inputs = processor(images=frame, return_tensors="pt")
31
- with torch.no_grad():
32
- outputs = model(**inputs)
33
- return model.config.id2label[outputs.logits.argmax(-1).item()]
34
-
35
- def process_video(video_path, processor, model, audio_classifier):
36
- """Process video and return combined results"""
37
- # Extract audio from video
38
- video = mp.VideoFileClip(video_path)
39
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_audio:
40
- audio_path = tmp_audio.name
41
- video.audio.write_audiofile(audio_path)
42
-
43
- # Analyze audio
44
- audio_signal = audio_classifier.load_audio(audio_path)
45
- audio_prediction = audio_classifier.classify_batch(audio_signal)
46
- audio_emotion = audio_prediction[3][0]
47
-
48
- # Analyze video frames
49
- cap = cv2.VideoCapture(video_path)
50
- emotions = []
51
 
52
- # Process every 5th frame to reduce computation
53
- frame_count = 0
54
  while cap.isOpened():
55
  ret, frame = cap.read()
56
  if not ret:
57
  break
58
-
59
- if frame_count % 5 == 0: # Sample every 5th frame
60
- frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
61
- emotions.append(analyze_frame(frame_rgb, processor, model))
62
-
63
- frame_count += 1
64
-
 
 
 
 
 
65
  cap.release()
66
- os.unlink(audio_path) # Clean up temporary audio file
67
-
68
- # Get most common visual emotion
69
- visual_emotion = max(set(emotions), key=emotions.count)
70
-
71
- return {
72
- 'audio_emotion': audio_emotion,
73
- 'visual_emotion': visual_emotion,
74
- 'frame_emotions': emotions
75
- }
76
-
77
- # 3. Streamlit UI
78
- st.set_page_config(page_title="Video Sentiment Analyzer", layout="wide")
79
-
80
- st.title("πŸŽ₯ Video Sentiment Analysis")
81
- st.markdown("""
82
- Analyze emotions from:
83
- - **Facial Expressions** using ViT (Vision Transformer)
84
- - **Speech Tone** using wav2vec2
85
- """)
86
-
87
- uploaded_file = st.file_uploader("Upload a video file (max 30 seconds)", type=["mp4", "mov", "avi"])
88
-
89
- if uploaded_file:
90
- # Display video preview
91
- st.video(uploaded_file)
92
-
93
- # Save to temporary file
94
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
95
- tmp_video.write(uploaded_file.getbuffer())
96
- video_path = tmp_video.name
97
-
98
- # Load models
99
- vit_processor, vit_model, audio_classifier = load_models()
100
-
101
  # Process video
102
- with st.spinner("Analyzing video content..."):
103
- try:
104
- results = process_video(video_path, vit_processor, vit_model, audio_classifier)
105
- finally:
106
- os.unlink(video_path) # Clean up temporary video file
107
-
108
- # Display results
109
- col1, col2 = st.columns(2)
110
-
111
- with col1:
112
- st.subheader("🎧 Audio Analysis")
113
- st.metric("Dominant Emotion", results['audio_emotion'])
114
-
115
- with col2:
116
- st.subheader("πŸ‘οΈ Visual Analysis")
117
- st.metric("Dominant Emotion", results['visual_emotion'])
118
-
119
- # Show emotion timeline
120
- st.subheader("πŸ“ˆ Emotion Timeline")
121
- st.line_chart(
122
- data={ "Frame Emotions": results['frame_emotions'] },
123
- use_container_width=True
124
- )
125
-
126
- st.success("Analysis complete!")
127
 
128
- # Footer
129
- st.markdown("---")
130
- st.markdown("Built with [Hugging Face](https://huggingface.co/) πŸ€— & [Streamlit](https://streamlit.io/) 🎈")
 
 
1
  import streamlit as st
2
  import cv2
 
 
 
3
  import torch
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ from facenet_pytorch import MTCNN
8
+ from transformers import AutoFeatureExtractor, AutoModelForImageClassification
9
+ from PIL import Image
10
+ from collections import Counter
11
  import tempfile
12
  import os
13
 
14
+ # Load models
15
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
16
+ mtcnn = MTCNN(device=device)
17
+ model = AutoModelForImageClassification.from_pretrained("trpakov/vit-face-expression").to(device)
18
+ extractor = AutoFeatureExtractor.from_pretrained("trpakov/vit-face-expression")
19
+
20
+ # Emotion labels
21
+ affectnet_labels = {
22
+ 0: "neutral", 1: "happy", 2: "sad", 3: "surprise", 4: "fear",
23
+ 5: "disgust", 6: "anger", 7: "contempt"
24
+ }
25
+
26
+ def detect_emotions(frame):
27
+ """Detects facial emotions in a given frame."""
28
+ img = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
29
+ faces, _ = mtcnn.detect(img)
30
+ if faces is None or len(faces) == 0:
31
+ return "No Face Detected"
32
+
33
+ face = img.crop(faces[0])
34
+ inputs = extractor(images=face, return_tensors="pt").to(device)
35
+ outputs = model(**inputs)
36
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
37
+
38
+ return model.config.id2label[torch.argmax(probs).item()]
39
+
40
+ def process_video(input_path):
41
+ """Processes video, overlays emotions, and creates a summary chart."""
42
+ cap = cv2.VideoCapture(input_path)
43
+ fps = int(cap.get(cv2.CAP_PROP_FPS))
44
+ frame_width, frame_height = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
45
+
46
+ # Create a temporary output video file
47
+ output_path = "output_video.mp4"
48
+ out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))
49
+
50
+ emotion_counts = []
 
 
51
 
 
 
52
  while cap.isOpened():
53
  ret, frame = cap.read()
54
  if not ret:
55
  break
56
+
57
+ emotion = detect_emotions(frame)
58
+ emotion_counts.append(emotion)
59
+
60
+ # Overlay emotion
61
+ overlay = frame.copy()
62
+ cv2.rectangle(overlay, (10, 10), (350, 80), (255, 255, 255), -1)
63
+ cv2.putText(overlay, f'Emotion: {emotion}', (20, 50), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
64
+ cv2.addWeighted(overlay, 0.6, frame, 0.4, 0, frame)
65
+
66
+ out.write(frame)
67
+
68
  cap.release()
69
+ out.release()
70
+ cv2.destroyAllWindows()
71
+
72
+ # Find major emotion
73
+ emotion_counter = Counter(emotion_counts)
74
+ major_emotion = emotion_counter.most_common(1)[0][0] if emotion_counter else "No Face Detected"
75
+
76
+ # Generate emotion distribution pie chart
77
+ plt.figure(figsize=(5, 5))
78
+ labels, sizes = zip(*emotion_counter.items())
79
+ plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=sns.color_palette('pastel'))
80
+ plt.title("Emotion Distribution")
81
+ plt.savefig("emotion_distribution.jpg")
82
+
83
+ return output_path, plt, major_emotion
84
+
85
+ # Streamlit Web Interface
86
+ st.set_page_config(page_title="Emotion Analysis from Video", layout="wide")
87
+
88
+ st.title("🎭 Emotion Analysis from Video πŸŽ₯")
89
+ st.markdown("Upload a video, and the AI will detect emotions in each frame, providing a processed video, an emotion distribution chart, and the major detected emotion.")
90
+
91
+ # File uploader
92
+ video_input = st.file_uploader("πŸ“€ Upload Video (MP4, MOV, AVI)", type=["mp4", "mov", "avi"])
93
+
94
+ if video_input is not None:
95
+ # Save uploaded video to a temporary file
96
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_file:
97
+ tmp_file.write(video_input.read())
98
+ video_path = tmp_file.name
99
+
 
 
 
 
100
  # Process video
101
+ if st.button("πŸš€ Analyze"):
102
+ with st.spinner("Processing video..."):
103
+ output_video, emotion_chart, major_emotion = process_video(video_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
+ # Display results
106
+ st.subheader("πŸ“₯ Processed Video")
107
+