Spaces:
No application file
No application file
Remove cuda feature requirement
Browse files
app.py
CHANGED
@@ -76,7 +76,8 @@ if __name__ == "__main__":
|
|
76 |
|
77 |
|
78 |
def process_video(video, model_option):
|
79 |
-
model = models[model_option].cuda()
|
|
|
80 |
|
81 |
original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
|
82 |
sample_rate = 16000
|
@@ -101,9 +102,11 @@ if __name__ == "__main__":
|
|
101 |
frames_to_plot = plotting_img_transform(original_frames.permute(0, 3, 1, 2))
|
102 |
|
103 |
with torch.no_grad():
|
104 |
-
audio_feats = model.forward_audio({"audio": audio.cuda()})
|
|
|
105 |
audio_feats = {k: v.cpu() for k, v in audio_feats.items()}
|
106 |
-
image_feats = model.forward_image({"frames": frames.unsqueeze(0).cuda()}, max_batch_size=2)
|
|
|
107 |
image_feats = {k: v.cpu() for k, v in image_feats.items()}
|
108 |
|
109 |
sim_by_head = model.sim_agg.get_pairwise_sims(
|
|
|
76 |
|
77 |
|
78 |
def process_video(video, model_option):
|
79 |
+
# model = models[model_option].cuda()
|
80 |
+
model = models[model_option]
|
81 |
|
82 |
original_frames, audio, info = torchvision.io.read_video(video, end_pts=10, pts_unit='sec')
|
83 |
sample_rate = 16000
|
|
|
102 |
frames_to_plot = plotting_img_transform(original_frames.permute(0, 3, 1, 2))
|
103 |
|
104 |
with torch.no_grad():
|
105 |
+
# audio_feats = model.forward_audio({"audio": audio.cuda()})
|
106 |
+
audio_feats = model.forward_audio({"audio": audio})
|
107 |
audio_feats = {k: v.cpu() for k, v in audio_feats.items()}
|
108 |
+
# image_feats = model.forward_image({"frames": frames.unsqueeze(0).cuda()}, max_batch_size=2)
|
109 |
+
image_feats = model.forward_image({"frames": frames.unsqueeze(0)}, max_batch_size=2)
|
110 |
image_feats = {k: v.cpu() for k, v in image_feats.items()}
|
111 |
|
112 |
sim_by_head = model.sim_agg.get_pairwise_sims(
|