Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -74,12 +74,16 @@ video_output2 = gr.Video(label="Multi-Head Audio Video Attention (Only Availible
|
|
74 |
height=480)
|
75 |
video_output3 = gr.Video(label="Visual Features", height=480)
|
76 |
|
77 |
-
models = {
|
|
|
|
|
|
|
|
|
|
|
78 |
|
79 |
def process_video(video, model_option):
|
80 |
# model = models[model_option].cuda()
|
81 |
model = models[model_option]
|
82 |
-
print(model_option)
|
83 |
|
84 |
original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
|
85 |
sample_rate = 16000
|
|
|
74 |
height=480)
|
75 |
video_output3 = gr.Video(label="Visual Features", height=480)
|
76 |
|
77 |
+
models = {
|
78 |
+
'language': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-language"),
|
79 |
+
'sound_and_language': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-sound-language"),
|
80 |
+
'sound': LitAVAligner.from_pretrained(f"mhamilton723/DenseAV-sound")
|
81 |
+
}
|
82 |
+
|
83 |
|
84 |
def process_video(video, model_option):
|
85 |
# model = models[model_option].cuda()
|
86 |
model = models[model_option]
|
|
|
87 |
|
88 |
original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
|
89 |
sample_rate = 16000
|