ManishThota commited on
Commit
9b12d4a
·
verified ·
1 Parent(s): 544cd28

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -2
app.py CHANGED
@@ -1,3 +1,43 @@
1
- import gradio as gr
2
 
3
- gr.load("models/ManishThota/InstructBlip-VQA").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import gradio as gr
2
 
3
+ # gr.load("models/ManishThota/InstructBlip-VQA").launch()
4
+
5
+
6
+ from PIL import Image
7
+ import torch
8
+ from transformers import BlipProcessor, BlipForQuestionAnswering
9
+ import json
10
+ import os
11
+
12
+ # Initialize the model and processor
13
+ processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
14
+ model = BlipForQuestionAnswering.from_pretrained("ManishThota/InstructBlip-VQA")
15
+
16
+ def predict_answer(image, question):
17
+ # Convert PIL image to RGB if not already
18
+ image = image.convert("RGB")
19
+
20
+ # Prepare inputs
21
+ encoding = processor(image, question, return_tensors="pt").to("cuda:0", torch.float16)
22
+
23
+ out = model.generate(**encoding)
24
+ generated_text = processor.decode(out[0], skip_special_tokens=True)
25
+
26
+ return generated_text
27
+
28
+
29
+ def gradio_predict(image, question):
30
+ answer = predict_answer(image, question)
31
+ return answer
32
+
33
+ # Define the Gradio interface
34
+ iface = gr.Interface(
35
+ fn=gradio_predict,
36
+ inputs=[gr.inputs.Image(), gr.inputs.Textbox(label="Question")],
37
+ outputs=gr.outputs.Textbox(label="Answer"),
38
+ title="Visual Question Answering",
39
+ description="This model answers questions based on the content of an image. Powered by BLIP.",
40
+ )
41
+
42
+ # Launch the app
43
+ iface.launch()