IbrahimHasani commited on
Commit
22ae21c
·
1 Parent(s): f201a9c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -1,19 +1,15 @@
1
  from PIL import Image
2
  from transformers import CLIPProcessor, CLIPModel
3
- from io import BytesIO
4
  import gradio as gr
5
 
6
-
7
  # Initialize CLIP model and processor
8
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
9
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
10
 
11
- def image_similarity(image: Image.Image, action_prompt: str):
12
- positive_text = f"a person {action_prompt}"
13
- negative_text = f"a person not {action_prompt}"
14
-
15
  inputs = processor(
16
- text=[positive_text, negative_text],
17
  images=image,
18
  return_tensors="pt",
19
  padding=True
@@ -31,17 +27,18 @@ interface = gr.Interface(
31
  fn=image_similarity,
32
  inputs=[
33
  gr.components.Image(type="pil"),
34
- gr.components.Text(label="Enter action prompt e.g. 'smiling'")
 
35
  ],
36
  outputs=[
37
  gr.components.Textbox(label="Result"),
38
  gr.components.Textbox(label="Probabilities")
39
  ],
40
  title="Engagify's Image Action Detection",
41
- description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against multiple negative labels that are supposedly far enough in the latent semantic space vs the target label. Do not use negative labels in the desired activity, rather the action to be performed.",
42
  live=False,
43
  theme=gr.themes.Monochrome(),
44
 
45
  )
46
 
47
- interface.launch()
 
1
  from PIL import Image
2
  from transformers import CLIPProcessor, CLIPModel
 
3
  import gradio as gr
4
 
 
5
  # Initialize CLIP model and processor
6
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch16")
7
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch16")
8
 
9
+ def image_similarity(image: Image.Image, positive_prompt: str, negative_prompt: str):
10
+
 
 
11
  inputs = processor(
12
+ text=[positive_prompt, negative_prompt],
13
  images=image,
14
  return_tensors="pt",
15
  padding=True
 
27
  fn=image_similarity,
28
  inputs=[
29
  gr.components.Image(type="pil"),
30
+ gr.components.Text(label="Enter positive prompt e.g. 'a smiling face'"),
31
+ gr.components.Text(label="Enter negative prompt e.g. 'a sad face'")
32
  ],
33
  outputs=[
34
  gr.components.Textbox(label="Result"),
35
  gr.components.Textbox(label="Probabilities")
36
  ],
37
  title="Engagify's Image Action Detection",
38
+ description="[Author: Ibrahim Hasani] This Method uses CLIP-VIT [Version: BASE-PATCH-16] to determine if an action is being performed in a image or not. (Binaray Classifier). It contrasts an Action against a negative label. Ensure the prompts accurately describe the desired detection.",
39
  live=False,
40
  theme=gr.themes.Monochrome(),
41
 
42
  )
43
 
44
+ interface.launch()