Irpan commited on
Commit
72a284b
·
1 Parent(s): 7c308d7
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -86,8 +86,8 @@ torch.hub.download_url_to_file('https://img.freepik.com/premium-photo/man-holds-
86
  vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
87
  vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
88
 
89
- flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-small")
90
- flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
91
 
92
 
93
 
@@ -98,14 +98,14 @@ def main(image, text):
98
  vqa_answer = vqa(image, cleaned_sentence)
99
  llm_answer = llm(cleaned_sentence, vqa_answer)
100
  final_answer, _ = google_translate(llm_answer, dest=dest_lang)
101
- return final_answer
102
 
103
 
104
  image = gr.Image(type="pil")
105
  question = gr.Textbox(label="Question")
106
  answer = gr.Textbox(label="Predicted answer")
107
  examples = [
108
- ["monkeys.jpg", "What are they doing in French?"],
109
  ["apple.jpg", "Qu'est-ce que c'est dans ma main en anglais?"]
110
  ]
111
 
 
86
  vqa_processor = ViltProcessor.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
87
  vqa_model = ViltForQuestionAnswering.from_pretrained("dandelin/vilt-b32-finetuned-vqa")
88
 
89
+ flan_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
90
+ flan_model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-large")
91
 
92
 
93
 
 
98
  vqa_answer = vqa(image, cleaned_sentence)
99
  llm_answer = llm(cleaned_sentence, vqa_answer)
100
  final_answer, _ = google_translate(llm_answer, dest=dest_lang)
101
+ return vqa_answer, final_answer
102
 
103
 
104
  image = gr.Image(type="pil")
105
  question = gr.Textbox(label="Question")
106
  answer = gr.Textbox(label="Predicted answer")
107
  examples = [
108
+ ["monkeys.jpg", "What are the monkeys doing in French?"],
109
  ["apple.jpg", "Qu'est-ce que c'est dans ma main en anglais?"]
110
  ]
111