Spaces:
Running
Running
Irpan
commited on
Commit
·
7c308d7
1
Parent(s):
190650c
app.py
CHANGED
@@ -81,6 +81,7 @@ def llm(cleaned_sentence, vqa_answer):
|
|
81 |
|
82 |
|
83 |
torch.hub.download_url_to_file('https://media.istockphoto.com/id/1174602891/photo/two-monkeys-mom-and-cub-eat-bananas.jpg?s=612x612&w=0&k=20&c=r7VXi9d1wHhyq3iAk9D2Z3yTZiOJMlLNtjdVRBEjG7g=', 'monkeys.jpg')
|
|
|
84 |
|
85 |
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
86 |
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
@@ -103,15 +104,23 @@ def main(image, text):
|
|
103 |
image = gr.Image(type="pil")
|
104 |
question = gr.Textbox(label="Question")
|
105 |
answer = gr.Textbox(label="Predicted answer")
|
106 |
-
examples = [
|
|
|
|
|
|
|
107 |
|
108 |
title = "Cross-lingual VQA"
|
109 |
-
description = "Visual Question Answering (VQA) across
|
|
|
|
|
|
|
|
|
110 |
|
111 |
interface = gr.Interface(fn=main,
|
112 |
inputs=[image, question],
|
113 |
outputs=answer,
|
114 |
examples=examples,
|
115 |
title=title,
|
116 |
-
description=description
|
|
|
117 |
interface.launch(debug=True)
|
|
|
81 |
|
82 |
|
83 |
torch.hub.download_url_to_file('https://media.istockphoto.com/id/1174602891/photo/two-monkeys-mom-and-cub-eat-bananas.jpg?s=612x612&w=0&k=20&c=r7VXi9d1wHhyq3iAk9D2Z3yTZiOJMlLNtjdVRBEjG7g=', 'monkeys.jpg')
|
84 |
+
torch.hub.download_url_to_file('https://img.freepik.com/premium-photo/man-holds-apple-his-hands_198067-740023.jpg', 'apple.jpg')
|
85 |
|
86 |
vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
87 |
vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
|
|
|
104 |
image = gr.Image(type="pil")
|
105 |
question = gr.Textbox(label="Question")
|
106 |
answer = gr.Textbox(label="Predicted answer")
|
107 |
+
examples = [
|
108 |
+
["monkeys.jpg", "What are they doing in French?"],
|
109 |
+
["apple.jpg", "Qu'est-ce que c'est dans ma main en anglais?"]
|
110 |
+
]
|
111 |
|
112 |
title = "Cross-lingual VQA"
|
113 |
+
description = "Visual Question Answering (VQA) across Langages"
|
114 |
+
article = """
|
115 |
+
Supports questions regarding the following langages:
|
116 |
+
['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu']
|
117 |
+
"""
|
118 |
|
119 |
interface = gr.Interface(fn=main,
|
120 |
inputs=[image, question],
|
121 |
outputs=answer,
|
122 |
examples=examples,
|
123 |
title=title,
|
124 |
+
description=description,
|
125 |
+
article=article)
|
126 |
interface.launch(debug=True)
|