ktllc commited on
Commit
39113b9
·
1 Parent(s): c55abad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -17
app.py CHANGED
@@ -1,50 +1,51 @@
 
1
  import numpy as np
2
  import clip
3
  import torch
4
- import gradio as gr
5
- import base64
6
  from PIL import Image
7
- from io import BytesIO
8
 
9
  # Load the CLIP model
10
  model, preprocess = clip.load("ViT-B/32")
11
  device = "cuda" if torch.cuda.is_available() else "cpu"
12
  model.to(device).eval()
13
 
14
- def find_similarity(image_base64, text_input):
15
- # Decode the base64 image string to bytes
16
- image_bytes = base64.b64decode(image_base64)
 
 
 
17
  image = Image.open(BytesIO(image_bytes))
18
 
19
  # Preprocess the image
20
  image = preprocess(image).unsqueeze(0).to(device)
21
 
22
- # Prepare input text
23
  text_tokens = clip.tokenize([text_input]).to(device)
24
 
25
  # Encode image and text features
26
  with torch.no_grad():
27
- image_features = model.encode_image(image).float()
28
- text_features = model.encode_text(text_tokens).float()
29
 
30
- # Normalize features and calculate similarity
31
- image_features /= image_features.norm(dim=-1, keepdim=True)
32
- text_features /= text_features.norm(dim=-1, keepdim=True)
33
- similarity = (text_features @ image_features.T).cpu().numpy()
34
 
35
- return similarity[0, 0]
36
 
 
37
  iface = gr.Interface(
38
  fn=find_similarity,
39
  inputs=[
40
- gr.inputs.Textbox(lines=3, label="Enter Base64 Image"),
41
- gr.inputs.Textbox(lines=3, label="Enter Text"),
42
  ],
43
  outputs="number",
44
  live=True,
45
  interpretation="default",
46
  title="CLIP Model Image-Text Cosine Similarity",
47
- description="Enter a base64-encoded image and text to find their cosine similarity.",
48
  )
49
 
50
  iface.launch()
 
1
+ import gradio as gr
2
  import numpy as np
3
  import clip
4
  import torch
 
 
5
  from PIL import Image
6
+ import base64
7
 
8
  # Load the CLIP model
9
  model, preprocess = clip.load("ViT-B/32")
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
  model.to(device).eval()
12
 
13
+ # Define a function to find similarity
14
+ def find_similarity(base64_image, text_input):
15
+ # Decode the base64 image to bytes
16
+ image_bytes = base64.b64decode(base64_image)
17
+
18
+ # Convert the bytes to a PIL image
19
  image = Image.open(BytesIO(image_bytes))
20
 
21
  # Preprocess the image
22
  image = preprocess(image).unsqueeze(0).to(device)
23
 
24
+ # Tokenize the text input
25
  text_tokens = clip.tokenize([text_input]).to(device)
26
 
27
  # Encode image and text features
28
  with torch.no_grad():
29
+ image_features = model.encode_image(image)
30
+ text_features = model.encode_text(text_tokens)
31
 
32
+ # Calculate cosine similarity
33
+ similarity = (image_features @ text_features.T).squeeze(0).cpu().numpy()
 
 
34
 
35
+ return similarity
36
 
37
+ # Create a Gradio interface
38
  iface = gr.Interface(
39
  fn=find_similarity,
40
  inputs=[
41
+ gr.inputs.Textbox(label="Base64 Image", lines=8),
42
+ "text"
43
  ],
44
  outputs="number",
45
  live=True,
46
  interpretation="default",
47
  title="CLIP Model Image-Text Cosine Similarity",
48
+ description="Upload a base64 image and enter text to find their cosine similarity.",
49
  )
50
 
51
  iface.launch()