Spaces:

Sagnik1750
/

Faceoff

Sleeping

App Files Files Community

Faceoff / app.py

Sagnik1750

Create app.py

c3b2feb verified 4 months ago

raw

history blame

2.69 kB

	import streamlit as st
	import cv2
	import numpy as np
	import moviepy.editor as mp
	from transformers import (
	ViTImageProcessor,
	ViTForImageClassification,
	pipeline
	)
	import torch

	# 1. Load Models
	@st.cache_resource
	def load_models():
	# Visual model
	vit_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224')
	vit_model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

	# Audio model
	audio_analyzer = pipeline(
	"audio-classification",
	model="speechbrain/emotion-recognition-wav2vec2-IEMOCAP"
	)
	return vit_processor, vit_model, audio_analyzer

	# 2. Processing Functions
	def analyze_frame(frame, processor, model):
	inputs = processor(images=frame, return_tensors="pt")
	with torch.no_grad():
	outputs = model(**inputs)
	return model.config.id2label[outputs.logits.argmax(-1).item()]

	def process_video(video_path, processor, model, audio_analyzer):
	# Extract audio
	video = mp.VideoFileClip(video_path)
	audio_path = "temp_audio.wav"
	video.audio.write_audiofile(audio_path)

	# Analyze audio
	audio_result = audio_analyzer(audio_path)
	audio_emotion = max(audio_result, key=lambda x: x['score'])['label']

	# Analyze video frames
	cap = cv2.VideoCapture(video_path)
	emotions = []

	while cap.isOpened():
	ret, frame = cap.read()
	if not ret: break
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	emotions.append(analyze_frame(frame, processor, model))

	cap.release()
	return {
	'audio': audio_emotion,
	'visual': max(set(emotions), key=emotions.count)
	}

	# 3. Streamlit UI
	st.title("Video Sentiment Analyzer 🎥")
	st.markdown("""
	Analyze emotions from:
	- Facial expressions (ViT model)
	- Audio tone (wav2vec2 model)
	""")

	uploaded_file = st.file_uploader("Upload video (max 200MB)", type=["mp4", "avi"])

	if uploaded_file:
	# Save to temp file
	with open("temp_video.mp4", "wb") as f:
	f.write(uploaded_file.getbuffer())

	# Load models
	vit_processor, vit_model, audio_analyzer = load_models()

	# Process video
	with st.spinner("Analyzing video..."):
	result = process_video(
	"temp_video.mp4",
	vit_processor,
	vit_model,
	audio_analyzer
	)

	# Display results
	col1, col2 = st.columns(2)
	with col1:
	st.subheader("🎧 Audio Analysis")
	st.metric("Emotion", result['audio'])

	with col2:
	st.subheader("👁️ Visual Analysis")
	st.metric("Dominant Emotion", result['visual'])

	st.success("Analysis complete!")