Spaces:
Sleeping
Sleeping
chuanenlin
commited on
Commit
Β·
df75121
1
Parent(s):
6792725
Demo version
Browse files- .DS_Store +0 -0
- whichframe.py +26 -69
- youtube.com_cookies.txt +0 -5
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
whichframe.py
CHANGED
@@ -17,39 +17,26 @@ CACHED_DATA_PATH = "cached_data/"
|
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
model, preprocess = openai_clip.load("ViT-B/32", device=device)
|
19 |
|
20 |
-
def export_cookies():
|
21 |
-
cookie_path = "youtube.com_cookies.txt"
|
22 |
-
if not os.path.exists(cookie_path):
|
23 |
-
with open(cookie_path, "w") as f:
|
24 |
-
f.write("# Netscape HTTP Cookie File\n")
|
25 |
-
f.write("# https://curl.haxx.se/rfc/cookie_spec.html\n")
|
26 |
-
f.write("# This is a generated file! Do not edit.\n\n")
|
27 |
-
f.write(".youtube.com\tTRUE\t/\tFALSE\t2147483647\tCONSENT\tYES+cb.20210328-17-p0.en+FX+{}\n")
|
28 |
-
return cookie_path
|
29 |
-
|
30 |
def fetch_video(url):
|
|
|
|
|
|
|
|
|
31 |
try:
|
32 |
-
cookie_path = export_cookies()
|
33 |
ydl_opts = {
|
34 |
'format': 'bestvideo[height<=360][ext=mp4]/best[height<=360]',
|
35 |
'quiet': True,
|
36 |
'no_warnings': True,
|
37 |
-
'cookies': cookie_path,
|
38 |
'extract_flat': False,
|
39 |
'no_check_certificates': True
|
40 |
}
|
41 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
42 |
info = ydl.extract_info(url, download=False)
|
43 |
-
duration = info.get('duration', 0)
|
44 |
-
if duration >= 300: # 5 minutes
|
45 |
-
st.error("Please find a YouTube video shorter than 5 minutes.")
|
46 |
-
st.stop()
|
47 |
video_url = info['url']
|
48 |
return None, video_url
|
49 |
|
50 |
except Exception as e:
|
51 |
st.error(f"Error fetching video: {str(e)}")
|
52 |
-
st.error("Try another YouTube video or check if the URL is correct.")
|
53 |
st.stop()
|
54 |
|
55 |
def extract_frames(video, status_text, progress_bar):
|
@@ -242,16 +229,28 @@ a:hover {text-decoration: none;}
|
|
242 |
"""
|
243 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
244 |
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
st.session_state.video_frames = None
|
249 |
-
if 'video_features' not in st.session_state:
|
250 |
st.session_state.video_features = None
|
251 |
-
if 'fps' not in st.session_state:
|
252 |
st.session_state.fps = None
|
253 |
-
|
254 |
-
st.session_state.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
255 |
|
256 |
st.title("Which Frame? ποΈπ")
|
257 |
st.markdown("""
|
@@ -259,52 +258,10 @@ Search a video semantically. For example, which frame has "a person with sunglas
|
|
259 |
Search using text, images, or a mix of text + image. WhichFrame uses [CLIP](https://github.com/openai/CLIP) for zero-shot frame classification.
|
260 |
""")
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
url = st.text_input("Enter a YouTube URL (e.g., https://www.youtube.com/watch?v=zTvJJnoWIPk)", key="url_input")
|
266 |
-
|
267 |
-
if st.button("Process Video"):
|
268 |
-
if not url:
|
269 |
-
st.error("Please enter a YouTube URL first")
|
270 |
-
else:
|
271 |
-
try:
|
272 |
-
cached_frames, cached_features, cached_fps, cached_frame_indices = load_cached_data(url)
|
273 |
-
|
274 |
-
if cached_frames is not None:
|
275 |
-
st.session_state.video_frames = cached_frames
|
276 |
-
st.session_state.video_features = cached_features
|
277 |
-
st.session_state.fps = cached_fps
|
278 |
-
st.session_state.frame_indices = cached_frame_indices
|
279 |
-
st.session_state.url = url
|
280 |
-
st.session_state.progress = 2
|
281 |
-
st.success("Loaded cached video data!")
|
282 |
-
else:
|
283 |
-
with st.spinner('Fetching video...'):
|
284 |
-
video, video_url = fetch_video(url)
|
285 |
-
st.session_state.url = url
|
286 |
-
|
287 |
-
progress_bar = st.progress(0)
|
288 |
-
status_text = st.empty()
|
289 |
-
|
290 |
-
# Extract frames
|
291 |
-
st.session_state.video_frames, st.session_state.fps, st.session_state.frame_indices = extract_frames(video_url, status_text, progress_bar)
|
292 |
-
|
293 |
-
# Encode frames
|
294 |
-
st.session_state.video_features = encode_frames(st.session_state.video_frames, status_text)
|
295 |
-
|
296 |
-
save_cached_data(url, st.session_state.video_frames, st.session_state.video_features, st.session_state.fps, st.session_state.frame_indices)
|
297 |
-
status_text.text('Finalizing...')
|
298 |
-
st.session_state.progress = 2
|
299 |
-
progress_bar.progress(100)
|
300 |
-
status_text.empty()
|
301 |
-
progress_bar.empty()
|
302 |
-
st.success("Video processed successfully!")
|
303 |
-
|
304 |
-
except Exception as e:
|
305 |
-
st.error(f"Error processing video: {str(e)}")
|
306 |
|
307 |
-
if st.session_state.
|
308 |
search_type = st.radio("Search Method", ["Text Search", "Image Search", "Text + Image Search"], index=0)
|
309 |
|
310 |
if search_type == "Text Search": # Text Search
|
|
|
17 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
18 |
model, preprocess = openai_clip.load("ViT-B/32", device=device)
|
19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
def fetch_video(url):
|
21 |
+
if url != EXAMPLE_URL:
|
22 |
+
st.error("Only the example video is supported due to compute constraints.")
|
23 |
+
st.stop()
|
24 |
+
|
25 |
try:
|
|
|
26 |
ydl_opts = {
|
27 |
'format': 'bestvideo[height<=360][ext=mp4]/best[height<=360]',
|
28 |
'quiet': True,
|
29 |
'no_warnings': True,
|
|
|
30 |
'extract_flat': False,
|
31 |
'no_check_certificates': True
|
32 |
}
|
33 |
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
34 |
info = ydl.extract_info(url, download=False)
|
|
|
|
|
|
|
|
|
35 |
video_url = info['url']
|
36 |
return None, video_url
|
37 |
|
38 |
except Exception as e:
|
39 |
st.error(f"Error fetching video: {str(e)}")
|
|
|
40 |
st.stop()
|
41 |
|
42 |
def extract_frames(video, status_text, progress_bar):
|
|
|
229 |
"""
|
230 |
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
|
231 |
|
232 |
+
# Initialize session state
|
233 |
+
if 'initialized' not in st.session_state:
|
234 |
+
st.session_state.initialized = False
|
235 |
st.session_state.video_frames = None
|
|
|
236 |
st.session_state.video_features = None
|
|
|
237 |
st.session_state.fps = None
|
238 |
+
st.session_state.frame_indices = None
|
239 |
+
st.session_state.url = EXAMPLE_URL
|
240 |
+
|
241 |
+
# Load data on first run
|
242 |
+
if not st.session_state.initialized:
|
243 |
+
cached_frames, cached_features, cached_fps, cached_frame_indices = load_cached_data(EXAMPLE_URL)
|
244 |
+
|
245 |
+
if cached_frames is not None:
|
246 |
+
st.session_state.video_frames = cached_frames
|
247 |
+
st.session_state.video_features = cached_features
|
248 |
+
st.session_state.fps = cached_fps
|
249 |
+
st.session_state.frame_indices = cached_frame_indices
|
250 |
+
st.session_state.initialized = True
|
251 |
+
else:
|
252 |
+
st.error("Could not load video data. Please contact the administrator.")
|
253 |
+
st.stop()
|
254 |
|
255 |
st.title("Which Frame? ποΈπ")
|
256 |
st.markdown("""
|
|
|
258 |
Search using text, images, or a mix of text + image. WhichFrame uses [CLIP](https://github.com/openai/CLIP) for zero-shot frame classification.
|
259 |
""")
|
260 |
|
261 |
+
st.video(EXAMPLE_URL)
|
262 |
+
st.caption("Note: Try out the code linked at the bottom of the page to run WhichFrame on your own videos.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
263 |
|
264 |
+
if st.session_state.initialized:
|
265 |
search_type = st.radio("Search Method", ["Text Search", "Image Search", "Text + Image Search"], index=0)
|
266 |
|
267 |
if search_type == "Text Search": # Text Search
|
youtube.com_cookies.txt
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
# Netscape HTTP Cookie File
|
2 |
-
# https://curl.haxx.se/rfc/cookie_spec.html
|
3 |
-
# This is a generated file! Do not edit.
|
4 |
-
|
5 |
-
.youtube.com TRUE / FALSE 2147483647 CONSENT YES+cb.20210328-17-p0.en+FX+{}
|
|
|
|
|
|
|
|
|
|
|
|