Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import torch
|
3 |
+
from PIL import Image
|
4 |
+
import face_recognition
|
5 |
+
import faiss
|
6 |
+
from sentence_transformers import SentenceTransformer
|
7 |
+
from transformers import pipeline
|
8 |
+
import cv2
|
9 |
+
import numpy as np
|
10 |
+
import subprocess
|
11 |
+
import tempfile
|
12 |
+
import os
|
13 |
+
import yt_dlp
|
14 |
+
from moviepy.editor import VideoFileClip
|
15 |
+
|
16 |
+
# Helper functions
|
17 |
+
def get_video_id(url):
|
18 |
+
return url.split("v=")[1].split("&")[0]
|
19 |
+
|
20 |
+
def download_youtube_video(url, output_path):
|
21 |
+
ydl_opts = {
|
22 |
+
'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best',
|
23 |
+
'outtmpl': os.path.join(output_path, '%(id)s.%(ext)s'),
|
24 |
+
}
|
25 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
26 |
+
info = ydl.extract_info(url, download=True)
|
27 |
+
filename = ydl.prepare_filename(info)
|
28 |
+
return filename
|
29 |
+
|
30 |
+
def process_video(video_url, output_dir, video_id):
|
31 |
+
# Placeholder for video processing logic
|
32 |
+
# This should include face detection, object detection, transcription, etc.
|
33 |
+
# For now, we'll just download the video
|
34 |
+
video_path = download_youtube_video(video_url, output_dir)
|
35 |
+
|
36 |
+
# Extract frames (simplified version)
|
37 |
+
video = cv2.VideoCapture(video_path)
|
38 |
+
fps = video.get(cv2.CAP_PROP_FPS)
|
39 |
+
frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
|
40 |
+
duration = frame_count / fps
|
41 |
+
|
42 |
+
frames = []
|
43 |
+
frame_times = []
|
44 |
+
for i in range(0, frame_count, int(fps)): # Extract one frame per second
|
45 |
+
video.set(cv2.CAP_PROP_POS_FRAMES, i)
|
46 |
+
ret, frame = video.read()
|
47 |
+
if ret:
|
48 |
+
frames.append(frame)
|
49 |
+
frame_times.append(i / fps)
|
50 |
+
|
51 |
+
video.release()
|
52 |
+
|
53 |
+
return {
|
54 |
+
'video_path': video_path,
|
55 |
+
'frames': frames,
|
56 |
+
'frame_times': frame_times,
|
57 |
+
'duration': duration,
|
58 |
+
'fps': fps
|
59 |
+
}
|
60 |
+
|
61 |
+
def search(query, index_path, metadata_path, model):
|
62 |
+
# Placeholder for search functionality
|
63 |
+
# This should use FAISS for efficient similarity search
|
64 |
+
return [], []
|
65 |
+
|
66 |
+
# Load models
|
67 |
+
@st.cache_resource
|
68 |
+
def load_models():
|
69 |
+
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
70 |
+
clip_model, preprocess = torch.hub.load('openai/CLIP', 'clip_vit_b32', device=device)
|
71 |
+
text_model = SentenceTransformer("all-MiniLM-L6-v2").to(device)
|
72 |
+
qa_model = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", device=0 if torch.cuda.is_available() else -1)
|
73 |
+
return clip_model, preprocess, text_model, qa_model
|
74 |
+
|
75 |
+
clip_model, preprocess, text_model, qa_model = load_models()
|
76 |
+
|
77 |
+
# Streamlit UI
|
78 |
+
st.title("Enhanced YouTube Video Analysis")
|
79 |
+
|
80 |
+
video_url = st.text_input("Enter YouTube Video URL")
|
81 |
+
if st.button("Analyze"):
|
82 |
+
with st.spinner("Processing video..."):
|
83 |
+
video_id = get_video_id(video_url)
|
84 |
+
results = process_video(video_url, "output_dir", video_id)
|
85 |
+
|
86 |
+
if results:
|
87 |
+
st.success("Video processed successfully!")
|
88 |
+
|
89 |
+
# Text search and question answering
|
90 |
+
st.subheader("Text Search and Q&A")
|
91 |
+
query = st.text_input("Enter a search query or question")
|
92 |
+
if query:
|
93 |
+
# Placeholder for text search and QA
|
94 |
+
st.write("Text search and QA functionality to be implemented")
|
95 |
+
|
96 |
+
# Image upload and similarity search
|
97 |
+
st.subheader("Image Search")
|
98 |
+
uploaded_image = st.file_uploader("Upload an image to find similar frames", type=["jpg", "jpeg", "png"])
|
99 |
+
if uploaded_image:
|
100 |
+
# Placeholder for image search
|
101 |
+
st.write("Image search functionality to be implemented")
|
102 |
+
|
103 |
+
# Face upload and recognition
|
104 |
+
st.subheader("Face Search")
|
105 |
+
uploaded_face = st.file_uploader("Upload a face image to find appearances", type=["jpg", "jpeg", "png"])
|
106 |
+
if uploaded_face:
|
107 |
+
face_image = face_recognition.load_image_file(uploaded_face)
|
108 |
+
face_encoding = face_recognition.face_encodings(face_image)[0]
|
109 |
+
|
110 |
+
face_appearances = []
|
111 |
+
face_frames = []
|
112 |
+
|
113 |
+
for i, frame in enumerate(results['frames']):
|
114 |
+
face_locations = face_recognition.face_locations(frame)
|
115 |
+
face_encodings = face_recognition.face_encodings(frame, face_locations)
|
116 |
+
|
117 |
+
for encoding in face_encodings:
|
118 |
+
if face_recognition.compare_faces([face_encoding], encoding)[0]:
|
119 |
+
face_appearances.append(results['frame_times'][i])
|
120 |
+
face_frames.append(frame)
|
121 |
+
|
122 |
+
st.write(f"Face appearances found at {len(face_appearances)} timestamps.")
|
123 |
+
|
124 |
+
if face_frames:
|
125 |
+
# Create a temporary directory to store frames
|
126 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
127 |
+
# Save frames as images
|
128 |
+
for i, frame in enumerate(face_frames):
|
129 |
+
cv2.imwrite(os.path.join(temp_dir, f"frame_{i:04d}.jpg"), frame)
|
130 |
+
|
131 |
+
# Use FFmpeg to create a video from the frames
|
132 |
+
output_video = "face_appearances.mp4"
|
133 |
+
ffmpeg_command = [
|
134 |
+
"ffmpeg",
|
135 |
+
"-framerate", str(results['fps']),
|
136 |
+
"-i", os.path.join(temp_dir, "frame_%04d.jpg"),
|
137 |
+
"-c:v", "libx264",
|
138 |
+
"-pix_fmt", "yuv420p",
|
139 |
+
output_video
|
140 |
+
]
|
141 |
+
subprocess.run(ffmpeg_command, check=True)
|
142 |
+
|
143 |
+
# Display the generated video
|
144 |
+
st.video(output_video)
|
145 |
+
|
146 |
+
# Provide download link for the video
|
147 |
+
with open(output_video, "rb") as file:
|
148 |
+
btn = st.download_button(
|
149 |
+
label="Download Face Appearances Video",
|
150 |
+
data=file,
|
151 |
+
file_name="face_appearances.mp4",
|
152 |
+
mime="video/mp4"
|
153 |
+
)
|
154 |
+
else:
|
155 |
+
st.write("No frames with the uploaded face were found in the video.")
|
156 |
+
|
157 |
+
# Display original video
|
158 |
+
st.subheader("Original Video")
|
159 |
+
st.video(results['video_path'])
|
160 |
+
|
161 |
+
else:
|
162 |
+
st.warning("Please enter a valid YouTube URL and click 'Analyze'")
|