File size: 1,260 Bytes
869fcc5
f1cd54c
418bc05
 
869fcc5
f1cd54c
 
 
869fcc5
f1cd54c
 
 
 
 
 
 
 
 
 
 
 
 
418bc05
f1cd54c
418bc05
f1cd54c
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import gradio as gr
from transformers import CLIPModel, AutoTokenizer, RawImage
import torch
import torch.nn.functional as F

# Load the CLIP model and tokenizer
model = CLIPModel.from_pretrained("Xenova/mobileclip_blt")
tokenizer = AutoTokenizer.from_pretrained("Xenova/mobileclip_blt")

# Define the inference function
def compute_probability(image):
    # Process the image
    image = RawImage.read(image)
    image_inputs = processor(image)
    image_embeds = vision_model(image_inputs)
    normalized_image_embeds = image_embeds.normalize().tolist()

    # Compute the probability
    text_inputs = tokenizer(["cats", "dogs", "birds"], padding="max_length", truncation=True)
    text_embeds = model(text_inputs)
    normalized_text_embeds = text_embeds.normalize().tolist()

    probabilities = [F.softmax(torch.tensor([100 * torch.dot(torch.tensor(x), torch.tensor(y)) for y in normalized_text_embeds])).tolist()[0] for x in normalized_image_embeds]

    return {"probability": probabilities[0]}

# Create the Gradio interface
iface = gr.Interface(
    fn=compute_probability,
    inputs="image",
    outputs="text",
    title="CLIP Probability",
    description="Upload an image and get the probability scores!"
)

# Launch the interface
iface.launch()