Spaces:

FunAudioLLM
/

ThinkSound

Running on Zero

liuhuadai commited on 6 days ago

Commit

c8ee233

verified ·

1 Parent(s): eac65ef

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -243,6 +243,7 @@ def get_video_duration(video_path):
 @torch.inference_mode()
 @torch.no_grad()
 def synthesize_video_with_audio(video_file, caption, cot):
     video_path = video_file
     if caption is None:
         caption = ''
@@ -255,7 +256,6 @@ def synthesize_video_with_audio(video_file, caption, cot):
     preprocesser = VGGSound(duration_sec=duration_sec)
     data = preprocesser.sample(video_path, caption, cot)
-    yield "⏳ Extracting Features…", None
     preprocessed_data = {}
     metaclip_global_text_features, metaclip_text_features = feature_extractor.encode_text(data['caption'])

 @torch.inference_mode()
 @torch.no_grad()
 def synthesize_video_with_audio(video_file, caption, cot):
+    yield "⏳ Extracting Features…", None
     video_path = video_file
     if caption is None:
         caption = ''
     preprocesser = VGGSound(duration_sec=duration_sec)
     data = preprocesser.sample(video_path, caption, cot)
     preprocessed_data = {}
     metaclip_global_text_features, metaclip_text_features = feature_extractor.encode_text(data['caption'])