mshukor HF Staff commited on
Commit
76f9c23
·
1 Parent(s): a8280cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -175,8 +175,8 @@ move2gpu(audio_caption_models, general_cfg)
175
  caption_generator = caption_task.build_generator(caption_models, caption_cfg.generation)
176
  refcoco_generator = refcoco_task.build_generator(refcoco_models, refcoco_cfg.generation)
177
  vqa_generator = vqa_task.build_generator(vqa_models, vqa_cfg.generation)
178
- # vqa_generator.zero_shot = True
179
- # vqa_generator.constraint_trie = None
180
  general_generator = general_task.build_generator(general_models, general_cfg.generation)
181
 
182
  video_caption_generator = caption_task.build_generator(video_caption_models, video_caption_cfg.generation)
@@ -469,11 +469,11 @@ def inference(image, audio, video, task_type, instruction):
469
  else:
470
  return None, tokens
471
 
472
- inputs = [gr.inputs.Image(type='pil'), gr.Audio(source="upload", type="filepath"), gr.Video(source="upload", type="filepath"), gr.inputs.Radio(choices=['Image Captioning', 'Video Captioning', 'Audio Captioning', "Visual Question Answering", "Visual Grounding", "General", "General Video"], type="value", default="Image Captioning", label="Task"), gr.inputs.Textbox(lines=1, label="Instruction")]
473
  outputs = [gr.outputs.Image(type='pil'), 'text']
474
  examples = [
475
  ['examples/images/soccer.jpg', None, None, 'Image Captioning', None],
476
- ['examples/images/woman_inblack.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
477
  ['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
478
  ['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
479
  ['examples/images/baseball.jpg', None, None, 'General', 'what is this sport?'],
 
175
  caption_generator = caption_task.build_generator(caption_models, caption_cfg.generation)
176
  refcoco_generator = refcoco_task.build_generator(refcoco_models, refcoco_cfg.generation)
177
  vqa_generator = vqa_task.build_generator(vqa_models, vqa_cfg.generation)
178
+ vqa_generator.zero_shot = True
179
+ vqa_generator.constraint_trie = None
180
  general_generator = general_task.build_generator(general_models, general_cfg.generation)
181
 
182
  video_caption_generator = caption_task.build_generator(video_caption_models, video_caption_cfg.generation)
 
469
  else:
470
  return None, tokens
471
 
472
+ inputs = [gr.inputs.Image(type='pil'), gr.Audio(source="upload", type="filepath"), gr.Video(source="upload", type="filepath"), gr.inputs.Radio(choices=['Image Captioning', 'Video Captioning', 'Audio Captioning', "Visual Grounding", "General", "General Video"], type="value", default="Image Captioning", label="Task"), gr.inputs.Textbox(lines=1, label="Instruction")]
473
  outputs = [gr.outputs.Image(type='pil'), 'text']
474
  examples = [
475
  ['examples/images/soccer.jpg', None, None, 'Image Captioning', None],
476
+ # ['examples/images/woman_inblack.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
477
  ['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
478
  ['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
479
  ['examples/images/baseball.jpg', None, None, 'General', 'what is this sport?'],