Irpan commited on
Commit
7c308d7
·
1 Parent(s): 190650c
Files changed (1) hide show
  1. app.py +12 -3
app.py CHANGED
@@ -81,6 +81,7 @@ def llm(cleaned_sentence, vqa_answer):
81
 
82
 
83
  torch.hub.download_url_to_file('https://media.istockphoto.com/id/1174602891/photo/two-monkeys-mom-and-cub-eat-bananas.jpg?s=612x612&w=0&k=20&c=r7VXi9d1wHhyq3iAk9D2Z3yTZiOJMlLNtjdVRBEjG7g=', 'monkeys.jpg')
 
84
 
85
  vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
86
  vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
@@ -103,15 +104,23 @@ def main(image, text):
103
  image = gr.Image(type="pil")
104
  question = gr.Textbox(label="Question")
105
  answer = gr.Textbox(label="Predicted answer")
106
- examples = [["monkeys.jpg", "How many monkeys are there, in French?"]]
 
 
 
107
 
108
  title = "Cross-lingual VQA"
109
- description = "Visual Question Answering (VQA) across langages"
 
 
 
 
110
 
111
  interface = gr.Interface(fn=main,
112
  inputs=[image, question],
113
  outputs=answer,
114
  examples=examples,
115
  title=title,
116
- description=description)
 
117
  interface.launch(debug=True)
 
81
 
82
 
83
  torch.hub.download_url_to_file('https://media.istockphoto.com/id/1174602891/photo/two-monkeys-mom-and-cub-eat-bananas.jpg?s=612x612&w=0&k=20&c=r7VXi9d1wHhyq3iAk9D2Z3yTZiOJMlLNtjdVRBEjG7g=', 'monkeys.jpg')
84
+ torch.hub.download_url_to_file('https://img.freepik.com/premium-photo/man-holds-apple-his-hands_198067-740023.jpg', 'apple.jpg')
85
 
86
  vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
87
  vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
 
104
  image = gr.Image(type="pil")
105
  question = gr.Textbox(label="Question")
106
  answer = gr.Textbox(label="Predicted answer")
107
+ examples = [
108
+ ["monkeys.jpg", "What are they doing in French?"],
109
+ ["apple.jpg", "Qu'est-ce que c'est dans ma main en anglais?"]
110
+ ]
111
 
112
  title = "Cross-lingual VQA"
113
+ description = "Visual Question Answering (VQA) across Langages"
114
+ article = """
115
+ Supports questions regarding the following langages:
116
+ ['afrikaans', 'albanian', 'amharic', 'arabic', 'armenian', 'azerbaijani', 'basque', 'belarusian', 'bengali', 'bosnian', 'bulgarian', 'catalan', 'cebuano', 'chichewa', 'chinese (simplified)', 'chinese (traditional)', 'corsican', 'croatian', 'czech', 'danish', 'dutch', 'english', 'esperanto', 'estonian', 'filipino', 'finnish', 'french', 'frisian', 'galician', 'georgian', 'german', 'greek', 'gujarati', 'haitian creole', 'hausa', 'hawaiian', 'hebrew', 'hebrew', 'hindi', 'hmong', 'hungarian', 'icelandic', 'igbo', 'indonesian', 'irish', 'italian', 'japanese', 'javanese', 'kannada', 'kazakh', 'khmer', 'korean', 'kurdish (kurmanji)', 'kyrgyz', 'lao', 'latin', 'latvian', 'lithuanian', 'luxembourgish', 'macedonian', 'malagasy', 'malay', 'malayalam', 'maltese', 'maori', 'marathi', 'mongolian', 'myanmar (burmese)', 'nepali', 'norwegian', 'odia', 'pashto', 'persian', 'polish', 'portuguese', 'punjabi', 'romanian', 'russian', 'samoan', 'scots gaelic', 'serbian', 'sesotho', 'shona', 'sindhi', 'sinhala', 'slovak', 'slovenian', 'somali', 'spanish', 'sundanese', 'swahili', 'swedish', 'tajik', 'tamil', 'telugu', 'thai', 'turkish', 'ukrainian', 'urdu', 'uyghur', 'uzbek', 'vietnamese', 'welsh', 'xhosa', 'yiddish', 'yoruba', 'zulu']
117
+ """
118
 
119
  interface = gr.Interface(fn=main,
120
  inputs=[image, question],
121
  outputs=answer,
122
  examples=examples,
123
  title=title,
124
+ description=description,
125
+ article=article)
126
  interface.launch(debug=True)