IbrahimHasani commited on
Commit
dab8972
·
1 Parent(s): 93fe568

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -14
app.py CHANGED
@@ -6,36 +6,39 @@ import gradio as gr
6
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
7
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
8
 
9
- def image_similarity(image: Image.Image, positive_prompt: str, negative_prompt: str):
10
 
 
 
11
  inputs = processor(
12
- text=[positive_prompt, negative_prompt],
13
- images=image,
14
- return_tensors="pt",
15
  padding=True
16
  )
17
 
18
  outputs = model(**inputs)
19
- logits_per_image = outputs.logits_per_image # image-text similarity score
20
- probs = logits_per_image.softmax(dim=1) # take the softmax to get the label probabilities
21
-
22
- # Determine if positive prompt has a higher probability than the negative prompt
23
- result = probs[0][0] > probs[0][1]
24
- return bool(result), f"Probabilities: Positive {probs[0][0]:.4f}, Negative {probs[0][1]:.4f}"
 
25
 
26
  interface = gr.Interface(
27
  fn=image_similarity,
28
  inputs=[
29
  gr.components.Image(type="pil"),
30
- gr.components.Text(label="Enter positive prompt e.g. 'a smiling face'"),
31
- gr.components.Text(label="Enter negative prompt e.g. 'a sad face'")
32
  ],
33
  outputs=[
34
  gr.components.Textbox(label="Result"),
35
- gr.components.Textbox(label="Probabilities")
36
  ],
37
  title="Engagify's Image Action Detection",
38
- description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against a negative label. Ensure the prompts accurately describe the desired detection.",
39
  live=False,
40
  theme=gr.themes.Monochrome(),
41
 
 
6
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
7
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
8
 
9
+ def image_similarity(image: Image.Image, positive_prompt: str, negative_prompts: list):
10
 
11
+ prompts = [positive_prompt] + negative_prompts
12
+
13
  inputs = processor(
14
+ text=prompts,
15
+ images=image,
16
+ return_tensors="pt",
17
  padding=True
18
  )
19
 
20
  outputs = model(**inputs)
21
+ logits_per_image = outputs.logits_per_image
22
+ probs = logits_per_image.softmax(dim=1)
23
+
24
+ # Determine if positive prompt has a higher probability than any of the negative prompts
25
+ is_positive_highest = probs[0][0] > max(probs[0][1:])
26
+
27
+ return bool(is_positive_highest), f"Probability for Positive Prompt: {probs[0][0]:.4f}"
28
 
29
  interface = gr.Interface(
30
  fn=image_similarity,
31
  inputs=[
32
  gr.components.Image(type="pil"),
33
+ gr.components.Text(label="Enter positive prompt e.g. 'a person drinking a beverage'"),
34
+ gr.components.Textbox(label="Enter negative prompts, separated by semicolon e.g. 'an empty scene; person without beverage'", placeholder="negative prompt 1; negative prompt 2; ..."),
35
  ],
36
  outputs=[
37
  gr.components.Textbox(label="Result"),
38
+ gr.components.Textbox(label="Probability for Positive Prompt")
39
  ],
40
  title="Engagify's Image Action Detection",
41
+ description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in an image or not. (Binary Classifier). It contrasts an Action against multiple negative labels. Ensure the prompts accurately describe the desired detection.",
42
  live=False,
43
  theme=gr.themes.Monochrome(),
44