|
import gradio as gr |
|
from PIL import Image |
|
import torch |
|
from transformers import CLIPProcessor, CLIPModel |
|
|
|
|
|
model = CLIPModel.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K") |
|
processor = CLIPProcessor.from_pretrained("laion/CLIP-ViT-H-14-laion2B-s32B-b79K") |
|
|
|
def calculate_similarity(image, text_prompt): |
|
|
|
if not isinstance(text_prompt, str): |
|
text_prompt = str(text_prompt) |
|
|
|
|
|
inputs = processor(images=image, text=text_prompt, return_tensors="pt", padding=True) |
|
|
|
|
|
outputs = model(**inputs) |
|
|
|
|
|
image_features = outputs.image_embeds / outputs.image_embeds.norm(dim=-1, keepdim=True) |
|
text_features = outputs.text_embeds / outputs.text_embeds.norm(dim=-1, keepdim=True) |
|
cosine_similarity = torch.nn.functional.cosine_similarity(image_features, text_features) |
|
|
|
|
|
adjusted_similarity = cosine_similarity.item() * 3 * 100 |
|
clipped_similarity = min(adjusted_similarity, 99.99) |
|
formatted_similarity = f"According to OpenCLIP, the image and the text prompt are {clipped_similarity:.2f}% similar." |
|
|
|
return formatted_similarity |
|
|
|
|
|
iface = gr.Interface( |
|
fn=calculate_similarity, |
|
inputs=[ |
|
gr.Image(type="pil", label="Upload Image", height=512), |
|
gr.Textbox(label="Text Prompt") |
|
], |
|
outputs=gr.Text(), |
|
allow_flagging="never", |
|
title="OpenClip Cosine Similarity Calculator", |
|
description="Provide a text prompt and upload an image to calculate the cosine similarity." |
|
) |
|
|
|
|
|
iface.launch(share=True) |
|
|