|
from transformers import ViltProcessor, ViltForQuestionAnswering
|
|
from transformers import BlipProcessor, BlipForQuestionAnswering
|
|
import requests
|
|
from PIL import Image
|
|
import json, os, csv
|
|
import logging
|
|
from tqdm import tqdm
|
|
import torch
|
|
|
|
|
|
test_data_dir = "Data/test_data/test_data"
|
|
|
|
|
|
|
|
|
|
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
|
|
model = BlipForQuestionAnswering.from_pretrained("Model/blip-saved-model").to("cuda")
|
|
|
|
|
|
results = []
|
|
|
|
|
|
samples = os.listdir(test_data_dir)
|
|
for filename in tqdm(os.listdir(test_data_dir), desc="Processing"):
|
|
sample_path = f"Data/test_data/{filename}"
|
|
|
|
|
|
json_path = os.path.join(sample_path, "data.json")
|
|
with open(json_path, "r") as json_file:
|
|
data = json.load(json_file)
|
|
question = data["question"]
|
|
image_id = data["id"]
|
|
|
|
|
|
image_path = os.path.join(test_data_dir, f"{image_id}", "image.png")
|
|
image = Image.open(image_path).convert("RGB")
|
|
|
|
|
|
encoding = processor(image, question, return_tensors="pt").to("cuda:0", torch.float16)
|
|
|
|
out = model.generate(**encoding)
|
|
generated_text = processor.decode(out[0], skip_special_tokens=True)
|
|
|
|
|
|
results.append((image_id, generated_text))
|
|
|
|
|
|
csv_file_path = "Results/results.csv"
|
|
with open(csv_file_path, mode="w", newline="") as csv_file:
|
|
csv_writer = csv.writer(csv_file)
|
|
csv_writer.writerow(["ID", "Label"])
|
|
csv_writer.writerows(results)
|
|
|
|
print(f"Results saved to {csv_file_path}") |