909ahmed commited on
Commit
c13ea66
·
verified ·
1 Parent(s): d0bdc68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -7
app.py CHANGED
@@ -2,24 +2,31 @@ import gradio as gr
2
  import torch
3
  import clip
4
  from PIL import Image
 
5
 
6
  device = "cuda" if torch.cuda.is_available() else "cpu"
7
  model, preprocess = clip.load("ViT-B/32", device=device)
8
 
9
- def clip(image, text):
 
 
10
 
11
- text = text.tobytes().decode('utf-8').split(',')
12
  image = preprocess(image).unsqueeze(0).to(device)
13
- text = clip.tokenize(text).to(device)
 
 
14
 
15
  with torch.no_grad():
 
16
  image_features = model.encode_image(image)
17
- text_features = model.encode_text(text)
18
 
19
- logits_per_image, logits_per_text = model(image, text)
 
20
  probs = logits_per_image.softmax(dim=-1).cpu().numpy()
21
 
22
  return probs
23
 
24
- demo = gr.Interface(fn=clip, inputs=["text", "image"], outputs="text")
25
- demo.launch()
 
2
  import torch
3
  import clip
4
  from PIL import Image
5
+ import numpy as np
6
 
7
  device = "cuda" if torch.cuda.is_available() else "cpu"
8
  model, preprocess = clip.load("ViT-B/32", device=device)
9
 
10
+ def process_image_and_text(image, text):
11
+ # Ensure text is a NumPy array and convert it to a list of strings
12
+ text_list = text.tolist()
13
 
14
+ # Preprocess the image
15
  image = preprocess(image).unsqueeze(0).to(device)
16
+
17
+ # Tokenize the text
18
+ text_tokens = clip.tokenize(text_list).to(device)
19
 
20
  with torch.no_grad():
21
+ # Encode image and text
22
  image_features = model.encode_image(image)
23
+ text_features = model.encode_text(text_tokens)
24
 
25
+ # Compute logits and probabilities
26
+ logits_per_image, logits_per_text = model(image, text_tokens)
27
  probs = logits_per_image.softmax(dim=-1).cpu().numpy()
28
 
29
  return probs
30
 
31
+ demo = gr.Interface(fn=process_image_and_text, inputs=[gr.inputs.Image(type="pil"), gr.inputs.Textbox()], outputs="text")
32
+ demo.launch()