Spaces:

Jiaqi-hkust
/

hawk

Runtime error

Jiaqi-hkust commited on Feb 24

Commit

671e9b3

verified ·

1 Parent(s): 95b4753

Update hawk/conversation/conversation_video.py

Files changed (1) hide show

hawk/conversation/conversation_video.py CHANGED Viewed

@@ -302,16 +302,16 @@ class Chat:
             video = self.vis_processor.transform(video)
             video_motion = self.vis_processor.transform(video_motion)
-            video = video.unsqueeze(0).to(self.device)
-            video_motion = video_motion.unsqueeze(0).to(self.device)
             # print(image)
         else:
             raise NotImplementedError
         # conv.system = "You can understand the video that the user provides.  Follow the instructions carefully and explain your answers in detail."
-        image_emb, _, _ = self.model.encode_videoQformer_visual(video) # 1,32,4096
-        image_motion_emb, _, _ = self.model.encode_videoQformer_visual(video_motion, motion=True) # 1,32,4096
         img_list.append(torch.cat((image_emb, image_motion_emb), dim=1))
         # img_list.append(image_motion_emb)
         conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")

             video = self.vis_processor.transform(video)
             video_motion = self.vis_processor.transform(video_motion)
+            video = video.unsqueeze(0).to(self.device).clone().detach()
+            video_motion = video_motion.unsqueeze(0).to(self.device).clone().detach()
             # print(image)
         else:
             raise NotImplementedError
         # conv.system = "You can understand the video that the user provides.  Follow the instructions carefully and explain your answers in detail."
+        image_emb, _, _ = self.model.encode_videoQformer_visual(video).clone().detach() # 1,32,4096
+        image_motion_emb, _, _ = self.model.encode_videoQformer_visual(video_motion, motion=True).clone().detach() # 1,32,4096
         img_list.append(torch.cat((image_emb, image_motion_emb), dim=1))
         # img_list.append(image_motion_emb)
         conv.append_message(conv.roles[0], "<Video><ImageHere></Video> ")