mshukor commited on
Commit
53fbbb9
·
1 Parent(s): 1a8aace

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -5
app.py CHANGED
@@ -169,7 +169,7 @@ move2gpu(refcoco_models, refcoco_cfg)
169
  move2gpu(vqa_models, vqa_cfg)
170
  move2gpu(general_models, general_cfg)
171
  move2gpu(video_caption_models, general_cfg)
172
- move2gpu(audio_general_models, general_cfg)
173
 
174
  # # Initialize generator
175
  caption_generator = caption_task.build_generator(caption_models, caption_cfg.generation)
@@ -198,7 +198,7 @@ pad_idx = general_task.src_dict.pad()
198
 
199
  type_transform = transforms.Lambda(lambda x: x.float().div(255.0))
200
  patch_video_resize_transform = transforms.Compose([
201
- transforms.CenterCrop(cfg.task.patch_frame_size),
202
  type_transform,
203
  transforms.Normalize(mean=mean, std=std),
204
  ])
@@ -222,8 +222,8 @@ def process_video(video_path, max_num_frames=16, num_frames=16, sample_type='ran
222
 
223
  def construct_video_sample(video_path):
224
 
225
- patch_video = process_video(video_path, max_num_frames=16, num_frames=cfg.task.num_frames, sample_type=cfg.task.sample_type,)
226
- patch_image = torch.zeros((3, cfg.task.patch_image_size, cfg.task.patch_image_size))
227
 
228
  patch_type = torch.tensor([1])
229
  patch_mask = torch.tensor([True])
@@ -279,7 +279,7 @@ def construct_audio_sample(audio_path):
279
 
280
 
281
  patch_audio = process_audio(audio_path, sample_rate=48000, max_audio_len=480000, audio_cfg=AUDIO_CFG)
282
- patch_image = torch.zeros((3, cfg.task.patch_image_size, cfg.task.patch_image_size))
283
 
284
  patch_type = torch.tensor([2])
285
  patch_mask = torch.tensor([True])
 
169
  move2gpu(vqa_models, vqa_cfg)
170
  move2gpu(general_models, general_cfg)
171
  move2gpu(video_caption_models, general_cfg)
172
+ move2gpu(audio_caption_models, general_cfg)
173
 
174
  # # Initialize generator
175
  caption_generator = caption_task.build_generator(caption_models, caption_cfg.generation)
 
198
 
199
  type_transform = transforms.Lambda(lambda x: x.float().div(255.0))
200
  patch_video_resize_transform = transforms.Compose([
201
+ transforms.CenterCrop(video_caption_cfg.task.patch_frame_size),
202
  type_transform,
203
  transforms.Normalize(mean=mean, std=std),
204
  ])
 
222
 
223
  def construct_video_sample(video_path):
224
 
225
+ patch_video = process_video(video_path, max_num_frames=16, num_frames=video_caption_cfg.task.num_frames, sample_type=video_caption_cfg.task.sample_type,)
226
+ patch_image = torch.zeros((3, video_caption_cfg.task.patch_image_size, video_caption_cfg.task.patch_image_size))
227
 
228
  patch_type = torch.tensor([1])
229
  patch_mask = torch.tensor([True])
 
279
 
280
 
281
  patch_audio = process_audio(audio_path, sample_rate=48000, max_audio_len=480000, audio_cfg=AUDIO_CFG)
282
+ patch_image = torch.zeros((3, audio_caption_cfg.task.patch_image_size, audio_caption_cfg.task.patch_image_size))
283
 
284
  patch_type = torch.tensor([2])
285
  patch_mask = torch.tensor([True])