Spaces:

909ahmed
/

CLIP

Sleeping

File size: 758 Bytes

fd2c1e0
 
 
 
 
 
 
 
c13ea66
105b214
c13ea66
02a2188
c13ea66
 
1e62ffa
02a2188
 
c13ea66
fd2c1e0
c13ea66
105b214
 
479108b
fd2c1e0
105b214
c13ea66

import gradio as gr
import torch
import clip
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"
model, preprocess = clip.load("ViT-B/32", device=device)

def process_image_and_text(image, text):
    
    text_list = text.tolist()
    image = preprocess(image).unsqueeze(0).to(device)

    text_tokens = clip.tokenize(text_list).to(device)

    with torch.no_grad():
        image_features = model.encode_image(image)
        text_features = model.encode_text(text_tokens)
        
        logits_per_image, logits_per_text = model(image, text_tokens)
        probs = logits_per_image.softmax(dim=-1)
        
    return probs

demo = gr.Interface(fn=process_image_and_text, inputs=['text', 'image'], outputs="text")
demo.launch()