Spaces:

lorocksUMD
/

DenseAV-Lowell

Running

lorocksUMD commited on Apr 1

Commit

a3e1fdc

verified ·

1 Parent(s): b17997f

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -74,12 +74,16 @@ video_output2 = gr.Video(label="Multi-Head Audio Video Attention (Only Availible
                          height=480)
 video_output3 = gr.Video(label="Visual Features", height=480)
-models = {o: LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-{o}") for o in ['language', "sound-language", "sound"]}
 def process_video(video, model_option):
     # model = models[model_option].cuda()
     model = models[model_option]
-    print(model_option)
     original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
     sample_rate = 16000

                          height=480)
 video_output3 = gr.Video(label="Visual Features", height=480)
+models = {
+    'language': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-language"),
+    'sound_and_language': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-sound-language"),
+    'sound': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-sound")
+}
 def process_video(video, model_option):
     # model = models[model_option].cuda()
     model = models[model_option]
     original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
     sample_rate = 16000