ktllc's picture
Create app.py
30d5af0
raw
history blame
1.55 kB
import numpy as np
import clip
import torch
import gradio as gr
from PIL import Image
import os
import base64
from io import BytesIO
# Load the CLIP model
model, preprocess = clip.load("ViT-B/32")
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device).eval()
# Define the Business Listing variable
Business_Listing = "Air Guide"
def find_similarity(image_base64, text_input):
# Decode the base64 image string to bytes
image_bytes = base64.b64decode(image_base64)
image = Image.open(BytesIO(image_bytes))
# Preprocess the image
image = preprocess(image).unsqueeze(0).to(device)
# Prepare input text
text_tokens = clip.tokenize([text_input]).to(device)
# Encode image and text features
with torch.no_grad():
image_features = model.encode_image(image).float()
text_features = model.encode_text(text_tokens).float()
# Normalize features and calculate similarity
image_features /= image_features.norm(dim=-1, keepdim=True)
text_features /= text_features.norm(dim=-1, keepdim=True)
similarity = (text_features @ image_features.T).cpu().numpy()
return similarity[0, 0]
# Define a Gradio interface
iface = gr.Interface(
fn=find_similarity,
inputs=["text", gr.inputs.Textbox(lines=3, label="Enter Base64 Image"), "text"],
outputs="number",
live=True,
interpretation="default",
title="CLIP Model Image-Text Cosine Similarity",
description="Enter a base64-encoded image and text to find their cosine similarity.",
)
iface.launch()