Update app.py
Browse files
app.py
CHANGED
@@ -422,18 +422,18 @@ def inference(image, audio, video, task_type, instruction):
|
|
422 |
instruction = 'which region does the text " {} " describe?'.format(instruction)
|
423 |
transform = refcoco_transform
|
424 |
cfg = refcoco_cfg
|
425 |
-
elif task_type
|
426 |
task = general_task
|
427 |
models = general_models
|
428 |
generator = general_generator
|
429 |
transform = general_transform
|
430 |
cfg = general_cfg
|
431 |
-
elif task_type == 'General Video':
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
else:
|
438 |
raise NotImplementedError
|
439 |
|
@@ -476,7 +476,13 @@ examples = [
|
|
476 |
['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
|
477 |
['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
|
478 |
['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
|
479 |
-
['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?']
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
]
|
481 |
|
482 |
title = "UnIVAL"
|
|
|
422 |
instruction = 'which region does the text " {} " describe?'.format(instruction)
|
423 |
transform = refcoco_transform
|
424 |
cfg = refcoco_cfg
|
425 |
+
elif task_type in ['General', 'General Video']:
|
426 |
task = general_task
|
427 |
models = general_models
|
428 |
generator = general_generator
|
429 |
transform = general_transform
|
430 |
cfg = general_cfg
|
431 |
+
# elif task_type == 'General Video':
|
432 |
+
# task = general_task
|
433 |
+
# models = video_general_models
|
434 |
+
# generator = video_general_generator
|
435 |
+
# transform = general_transform
|
436 |
+
# cfg = video_general_cfg
|
437 |
else:
|
438 |
raise NotImplementedError
|
439 |
|
|
|
476 |
['examples/images/ski.jpg', None, None, 'Visual Question Answering', 'what does the woman wearing black do?'],
|
477 |
['examples/images/banana.jpg', None, None, 'Visual Grounding', 'the detached banana'],
|
478 |
['examples/images/skateboard.jpg', None, None, 'General', 'which region does the text " a yellow bird " describe?'],
|
479 |
+
['examples/images/baseball.jpg', None, None, 'General', 'what color is the left car?'],
|
480 |
+
[None, None, 'examples/videos/video7014.mp4', 'Video Captioning', None],
|
481 |
+
[None, None, 'examples/videos/video7017.mp4', 'Video Captioning', None],
|
482 |
+
[None, None, 'examples/videos/video7019.mp4', 'Video Captioning', None],
|
483 |
+
[None, None, 'examples/videos/video7021.mp4', 'Video Captioning', None],
|
484 |
+
[None, 'examples/audios/6cS0FsUM-cQ.wav', None, 'Audio Captioning', None],
|
485 |
+
[None, 'examples/audios/AJtNitYMa1I.wav', None, 'Audio Captioning', None],
|
486 |
]
|
487 |
|
488 |
title = "UnIVAL"
|