IbrahimHasani commited on
Commit
897701e
·
1 Parent(s): fd5f2f2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -0
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from PIL import Image
2
+ from transformers import CLIPProcessor, CLIPModel
3
+ from io import BytesIO
4
+ import gradio as gr
5
+
6
+
7
+ # Initialize CLIP model and processor
8
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
9
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
10
+
11
+ def image_similarity(image: Image.Image, action_prompt: str) -> bool:
12
+ positive_text = f"a picture of someone {action_prompt}"
13
+ negative_text = f"other"
14
+
15
+ inputs = processor(
16
+ text=[positive_text, negative_text],
17
+ images=image,
18
+ return_tensors="pt",
19
+ padding=True
20
+ )
21
+
22
+ outputs = model(**inputs)
23
+ logits_per_image = outputs.logits_per_image # image-text similarity score
24
+ probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities
25
+
26
+ # Determine if positive prompt has a higher probability than the negative prompt
27
+ result = probs[0][0] > probs[0][1]
28
+ return result
29
+
30
+
31
+ # Initialize CLIP model and processor
32
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
33
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
34
+
35
+ def image_similarity(image: Image.Image, action_prompt: str):
36
+ positive_text = f"a picture of someone {action_prompt}"
37
+ negative_text = f"not a picture of someone {action_prompt}"
38
+
39
+ inputs = processor(
40
+ text=[positive_text, negative_text],
41
+ images=image,
42
+ return_tensors="pt",
43
+ padding=True
44
+ )
45
+
46
+ outputs = model(**inputs)
47
+ logits_per_image = outputs.logits_per_image # image-text similarity score
48
+ probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities
49
+
50
+ # Determine if positive prompt has a higher probability than the negative prompt
51
+ result = probs[0][0] > probs[0][1]
52
+ return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}"
53
+
54
+ interface = gr.Interface(
55
+ fn=image_similarity,
56
+ inputs=[
57
+ gr.components.Image(type="pil"),
58
+ gr.components.Text(label="Enter action prompt e.g. 'smiling'")
59
+ ],
60
+ outputs=[
61
+ gr.components.Textbox(label="Result"),
62
+ gr.components.Textbox(label="Probabilities")
63
+ ],
64
+ title="Engagify's Image Action Detection",
65
+ description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against multiple negative labels that are supposedly far enough in the latent semantic space vs the target label. Do not use negative labels in the desired activity, rather the action to be performed.",
66
+ live=False,
67
+ theme=gr.themes.Monochrome(),
68
+
69
+ )
70
+
71
+ interface.launch()