Spaces:
Runtime error
Runtime error
#!/usr/bin/env python3 | |
from transformers.utils import logging | |
logging.set_verbosity_error() | |
from transformers import BlipForImageTextRetrieval | |
from transformers import AutoProcessor | |
from PIL import Image | |
import math, random, time | |
# import random | |
# import time | |
import torch | |
# multi-modal Model | |
# accepts both text and image content (or audio, etc.) | |
print("loading model ...") | |
model = BlipForImageTextRetrieval.from_pretrained("Salesforce/blip-itm-base-coco") | |
processor = AutoProcessor.from_pretrained("Salesforce/blip-itm-base-coco") | |
print("loading image ...") | |
raw_image = Image.open('./assets/pot-o-gold-my-little-pony-Derpy.jpeg').convert('RGB') | |
print("processing ...") | |
statements = [ | |
"an image of a horse", | |
"a horse and a rainbow", | |
"a pony and a rainbow", | |
"a unicorn and a rainbow", | |
"a pony in a forest", | |
"a rainbox over a lake", | |
"a horse running through the forest", | |
"two eyes that do not match", | |
"equine joy", | |
"a stallion and gold coins", | |
"a mare and gold coins" | |
] | |
while True: | |
index = math.floor(random.random() * len(statements)) | |
text = statements[index] | |
inputs = processor(images=raw_image, | |
text=text, | |
return_tensors="pt") # PyTorch tensors | |
itm_scores = model(**inputs)[0] | |
itm_score = torch.nn.functional.softmax(itm_scores, dim=1) | |
print(f"""'{text}' => {itm_score[0][1]:.2f}""") | |