Spaces:

ManishThota
/

InstructVQA

Paused

ManishThota commited on Feb 9, 2024

Commit

e4a1586

verified ·

1 Parent(s): 2139b27

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ from transformers import BlipProcessor, BlipForQuestionAnswering
 # Initialize the model and processor
 processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
-model = BlipForQuestionAnswering.from_pretrained("ManishThota/InstructBlip-VQA")
 # model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
 def predict_answer(image, question):
@@ -13,7 +13,7 @@ def predict_answer(image, question):
     image = image.convert("RGB")
     # Prepare inputs
-    encoding = processor(image, question, return_tensors="pt")
     out = model.generate(**encoding)
     generated_text = processor.decode(out[0], skip_special_tokens=True)

 # Initialize the model and processor
 processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
+model = BlipForQuestionAnswering.from_pretrained("ManishThota/InstructBlip-VQA").to("cuda")
 # model = BlipForQuestionAnswering.from_pretrained("Salesforce/blip-vqa-base")
 def predict_answer(image, question):
     image = image.convert("RGB")
     # Prepare inputs
+    encoding = processor(image, question, return_tensors="pt").to("cuda:0", torch.float16)
     out = model.generate(**encoding)
     generated_text = processor.decode(out[0], skip_special_tokens=True)