Spaces:

ManishThota
/

InstructVQA

Paused

ManishThota commited on Feb 8, 2024

Commit

9b12d4a

verified ·

1 Parent(s): 544cd28

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,3 +1,43 @@
-import gradio as gr
-gr.load("models/ManishThota/InstructBlip-VQA").launch()

+# import gradio as gr
+# gr.load("models/ManishThota/InstructBlip-VQA").launch()
+from PIL import Image
+import torch
+from transformers import BlipProcessor, BlipForQuestionAnswering
+import json
+import os
+# Initialize the model and processor
+processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+model = BlipForQuestionAnswering.from_pretrained("ManishThota/InstructBlip-VQA")
+def predict_answer(image, question):
+    # Convert PIL image to RGB if not already
+    image = image.convert("RGB")
+    # Prepare inputs
+    encoding = processor(image, question, return_tensors="pt").to("cuda:0", torch.float16)
+    out = model.generate(**encoding)
+    generated_text = processor.decode(out[0], skip_special_tokens=True)
+    return generated_text
+def gradio_predict(image, question):
+    answer = predict_answer(image, question)
+    return answer
+# Define the Gradio interface
+iface = gr.Interface(
+    fn=gradio_predict,
+    inputs=[gr.inputs.Image(), gr.inputs.Textbox(label="Question")],
+    outputs=gr.outputs.Textbox(label="Answer"),
+    title="Visual Question Answering",
+    description="This model answers questions based on the content of an image. Powered by BLIP.",
+)
+# Launch the app
+iface.launch()