mshukor commited on
Commit
db205a8
·
1 Parent(s): 8e6c889

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -422,18 +422,18 @@ def inference(image, audio, video, task_type, instruction):
422
  instruction = 'which region does the text " {} " describe?'.format(instruction)
423
  transform = refcoco_transform
424
  cfg = refcoco_cfg
425
- elif task_type == 'General':
426
  task = general_task
427
  models = general_models
428
  generator = general_generator
429
  transform = general_transform
430
  cfg = general_cfg
431
- elif task_type == 'General Video':
432
- task = video_general_task
433
- models = video_general_models
434
- generator = video_general_generator
435
- transform = general_transform
436
- cfg = video_general_cfg
437
  else:
438
  raise NotImplementedError
439
 
@@ -476,7 +476,13 @@ examples = [
476
  ['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
477
  ['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
478
  ['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
479
- ['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?']
 
 
 
 
 
 
480
  ]
481
 
482
  title = "UnIVAL"
 
422
  instruction = 'which region does the text " {} " describe?'.format(instruction)
423
  transform = refcoco_transform
424
  cfg = refcoco_cfg
425
+ elif task_type in ['General', 'General Video']:
426
  task = general_task
427
  models = general_models
428
  generator = general_generator
429
  transform = general_transform
430
  cfg = general_cfg
431
+ # elif task_type == 'General Video':
432
+ # task = general_task
433
+ # models = video_general_models
434
+ # generator = video_general_generator
435
+ # transform = general_transform
436
+ # cfg = video_general_cfg
437
  else:
438
  raise NotImplementedError
439
 
 
476
  ['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
477
  ['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
478
  ['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
479
+ ['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?'],
480
+ [None, None, 'examples/videos/video7014.mp4', 'Video Captioning', None],
481
+ [None, None, 'examples/videos/video7017.mp4', 'Video Captioning', None],
482
+ [None, None, 'examples/videos/video7019.mp4', 'Video Captioning', None],
483
+ [None, None, 'examples/videos/video7021.mp4', 'Video Captioning', None],
484
+ [None, 'examples/audios/6cS0FsUM-cQ.wav', None, 'Audio Captioning', None],
485
+ [None, 'examples/audios/AJtNitYMa1I.wav', None, 'Audio Captioning', None],
486
  ]
487
 
488
  title = "UnIVAL"