Gosula commited on
Commit
dd2691a
·
1 Parent(s): 480f040

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -0
app.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import CLIPProcessor, CLIPModel
3
+
4
+ # Load the CLIP model and processor
5
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
6
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
7
+
8
+ def clip_inference(input_img, input_text):
9
+ # Split input_text into a list of text entries
10
+ text_entries = [text.strip() for text in input_text.split(",")]
11
+
12
+ # Prepare inputs for CLIP model
13
+ inputs = processor(text=text_entries, images=input_img, return_tensors="pt", padding=True)
14
+
15
+ # Get similarity scores
16
+ outputs = model(**inputs)
17
+ logits_per_image = outputs.logits_per_image
18
+ probs = logits_per_image.softmax(dim=1)
19
+
20
+ # Format the output probabilities as a comma-separated string
21
+ output_prob = ', '.join([str(prob.item()) for prob in probs[0]])
22
+
23
+ return output_prob
24
+
25
+ title = "CLIP OpenAI Model"
26
+ description = "Find similarity between images and multiple text entries (separated by commas)."
27
+ text_examples = ["a sky with full of stars, painting image",
28
+ "a dog playing in the garden, a dog sleeping in the garden",
29
+ "a small girl dancing, a small girl playing guitar",
30
+ "a small family cooking in the kitchen,family watching the movie",
31
+ "students inside the class,students playing in the ground ",
32
+ "a traffic signal, a lot of cars",
33
+ "a theatre, a football stadium",
34
+ "group of animals, group of birds",
35
+ "yellow sunflowers, red roses",
36
+ "sunset across the lake, sky with full of stars"]
37
+
38
+ examples = [["examples/images_" + str(i) + ".jpg", text] for i, text in enumerate(text_examples)]
39
+
40
+ demo = gr.Interface(
41
+ clip_inference,
42
+ inputs=[
43
+ gr.Image(label="Input image"),
44
+ gr.Textbox(placeholder="Input text : Multiple entries separated by commas"),
45
+ ],
46
+ outputs=[gr.Textbox(label="similarity scores")],
47
+ title=title,
48
+ description=description,
49
+ examples=examples
50
+ )
51
+
52
+ demo.launch()
53
+