sagar007 commited on
Commit
3d6a9c7
·
verified ·
1 Parent(s): eefe5b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -23
app.py CHANGED
@@ -9,7 +9,7 @@ import supervision as sv
9
  import os
10
 
11
  # Load CLIP model
12
- model = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
  # Initialize FastSAM model
@@ -24,30 +24,44 @@ def process_image_clip(image, text_input):
24
  if not text_input:
25
  return "Please enter some text to check in the image."
26
 
27
- # Process image for CLIP
28
- inputs = processor(
29
- images=image,
30
- text=[text_input],
31
- return_tensors="pt",
32
- padding=True
33
- )
34
-
35
- # Get model predictions
36
- outputs = model(**inputs)
37
- logits_per_image = outputs.logits_per_image
38
- probs = logits_per_image.softmax(dim=1)
39
-
40
- confidence = float(probs[0][0])
41
- return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def process_image_fastsam(image):
44
  if image is None:
45
  return None
46
 
47
- # Convert PIL image to numpy array
48
- image_np = np.array(image)
49
-
50
  try:
 
 
 
 
 
 
51
  # Run FastSAM inference
52
  results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
53
 
@@ -79,7 +93,7 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
79
 
80
  with gr.Tab("CLIP Zero-Shot Classification"):
81
  with gr.Row():
82
- image_input = gr.Image(type="pil", label="Input Image")
83
  text_input = gr.Textbox(
84
  label="What do you want to check in the image?",
85
  placeholder="e.g., 'a dog', 'sunset', 'people playing'",
@@ -88,13 +102,29 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
88
  output_text = gr.Textbox(label="Result")
89
  classify_btn = gr.Button("Classify")
90
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
 
 
 
 
 
 
 
 
91
 
92
  with gr.Tab("FastSAM Segmentation"):
93
  with gr.Row():
94
- image_input_sam = gr.Image(type="pil", label="Input Image")
95
- image_output = gr.Image(type="pil", label="Segmentation Result")
96
  segment_btn = gr.Button("Segment")
97
  segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
 
 
 
 
 
 
 
 
98
 
99
  gr.Markdown("""
100
  ### How to use:
@@ -106,4 +136,4 @@ with gr.Blocks(css="footer {visibility: hidden}") as demo:
106
  - For best results, use clear images with good lighting
107
  """)
108
 
109
- demo.launch()
 
9
  import os
10
 
11
  # Load CLIP model
12
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
  # Initialize FastSAM model
 
24
  if not text_input:
25
  return "Please enter some text to check in the image."
26
 
27
+ try:
28
+ # Convert numpy array to PIL Image if needed
29
+ if isinstance(image, np.ndarray):
30
+ image = Image.fromarray(image)
31
+
32
+ # Create a list of candidate labels
33
+ candidate_labels = [text_input, f"not {text_input}"]
34
+
35
+ # Process image and text
36
+ inputs = processor(
37
+ images=image,
38
+ text=candidate_labels,
39
+ return_tensors="pt",
40
+ padding=True
41
+ )
42
+
43
+ # Get model predictions
44
+ outputs = model(**{k: v for k, v in inputs.items()})
45
+ logits_per_image = outputs.logits_per_image
46
+ probs = logits_per_image.softmax(dim=1)
47
+
48
+ # Get confidence for the positive label
49
+ confidence = float(probs[0][0])
50
+ return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
51
+ except Exception as e:
52
+ return f"Error processing image: {str(e)}"
53
 
54
  def process_image_fastsam(image):
55
  if image is None:
56
  return None
57
 
 
 
 
58
  try:
59
+ # Convert PIL image to numpy array if needed
60
+ if isinstance(image, Image.Image):
61
+ image_np = np.array(image)
62
+ else:
63
+ image_np = image
64
+
65
  # Run FastSAM inference
66
  results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
67
 
 
93
 
94
  with gr.Tab("CLIP Zero-Shot Classification"):
95
  with gr.Row():
96
+ image_input = gr.Image(label="Input Image")
97
  text_input = gr.Textbox(
98
  label="What do you want to check in the image?",
99
  placeholder="e.g., 'a dog', 'sunset', 'people playing'",
 
102
  output_text = gr.Textbox(label="Result")
103
  classify_btn = gr.Button("Classify")
104
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
105
+
106
+ gr.Examples(
107
+ examples=[
108
+ ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen/kitchen.png", "kitchen"],
109
+ ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/calculator/calculator.jpg", "calculator"],
110
+ ],
111
+ inputs=[image_input, text_input],
112
+ )
113
 
114
  with gr.Tab("FastSAM Segmentation"):
115
  with gr.Row():
116
+ image_input_sam = gr.Image(label="Input Image")
117
+ image_output = gr.Image(label="Segmentation Result")
118
  segment_btn = gr.Button("Segment")
119
  segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
120
+
121
+ gr.Examples(
122
+ examples=[
123
+ ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/kitchen/kitchen.png"],
124
+ ["https://raw.githubusercontent.com/gradio-app/gradio/main/demo/calculator/calculator.jpg"],
125
+ ],
126
+ inputs=[image_input_sam],
127
+ )
128
 
129
  gr.Markdown("""
130
  ### How to use:
 
136
  - For best results, use clear images with good lighting
137
  """)
138
 
139
+ demo.launch(share=True)