sagar007 commited on
Commit
3701938
·
verified ·
1 Parent(s): 54598df

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -14
app.py CHANGED
@@ -5,16 +5,21 @@ import cv2
5
  import numpy as np
6
  from transformers import CLIPProcessor, CLIPModel
7
  from ultralytics import FastSAM
8
- from ultralytics.models.fastsam import FastSAMPrompt
 
9
 
10
  # Load CLIP model
11
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
12
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
13
 
14
- # Load FastSAM model
15
- fast_sam = FastSAM('FastSAM-x.pt')
 
16
 
17
  def process_image_clip(image, text_input):
 
 
 
18
  # Process image for CLIP
19
  inputs = processor(
20
  images=image,
@@ -32,28 +37,44 @@ def process_image_clip(image, text_input):
32
  return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
33
 
34
  def process_image_fastsam(image):
 
 
 
35
  # Convert PIL image to numpy array
36
  image_np = np.array(image)
37
 
38
  # Run FastSAM inference
39
- everything_results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
40
- prompt_process = FastSAMPrompt(image_np, everything_results, device='cpu')
 
 
 
 
 
 
41
 
42
- # Get everything mask
43
- ann = prompt_process.everything()
 
44
 
45
- # Convert annotation to image
46
- result_image = prompt_process.plot_to_result()
47
- return Image.fromarray(result_image)
48
 
49
  # Create Gradio interface
50
  with gr.Blocks() as demo:
51
- gr.Markdown("# CLIP and FastSAM Demo")
 
 
 
 
 
 
 
52
 
53
  with gr.Tab("CLIP Zero-Shot Classification"):
54
  with gr.Row():
55
  image_input = gr.Image(type="pil", label="Input Image")
56
- text_input = gr.Textbox(label="What do you want to check in the image?", placeholder="Type here...")
 
57
  output_text = gr.Textbox(label="Result")
58
  classify_btn = gr.Button("Classify")
59
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
@@ -64,6 +85,11 @@ with gr.Blocks() as demo:
64
  image_output = gr.Image(type="pil", label="Segmentation Result")
65
  segment_btn = gr.Button("Segment")
66
  segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
 
 
 
 
 
 
67
 
68
- if __name__ == "__main__":
69
- demo.launch()
 
5
  import numpy as np
6
  from transformers import CLIPProcessor, CLIPModel
7
  from ultralytics import FastSAM
8
+ import supervision as sv
9
+ from huggingface_hub import hf_hub_download
10
 
11
  # Load CLIP model
12
  model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
+ # Download and load FastSAM model
16
+ model_path = hf_hub_download("Jiawei-Yang/FastSAM-x", filename="FastSAM-x.pt")
17
+ fast_sam = FastSAM(model_path)
18
 
19
  def process_image_clip(image, text_input):
20
+ if image is None:
21
+ return "Please upload an image first."
22
+
23
  # Process image for CLIP
24
  inputs = processor(
25
  images=image,
 
37
  return f"Confidence that the image contains '{text_input}': {confidence:.2%}"
38
 
39
  def process_image_fastsam(image):
40
+ if image is None:
41
+ return None
42
+
43
  # Convert PIL image to numpy array
44
  image_np = np.array(image)
45
 
46
  # Run FastSAM inference
47
+ results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
48
+
49
+ # Get detections
50
+ detections = sv.Detections.from_ultralytics(results[0])
51
+
52
+ # Create annotator
53
+ box_annotator = sv.BoxAnnotator()
54
+ mask_annotator = sv.MaskAnnotator()
55
 
56
+ # Annotate image
57
+ annotated_image = mask_annotator.annotate(scene=image_np.copy(), detections=detections)
58
+ annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
59
 
60
+ return Image.fromarray(annotated_image)
 
 
61
 
62
  # Create Gradio interface
63
  with gr.Blocks() as demo:
64
+ gr.Markdown("""
65
+ # CLIP and FastSAM Demo
66
+ This demo combines two powerful AI models:
67
+ - **CLIP**: For zero-shot image classification
68
+ - **FastSAM**: For automatic image segmentation
69
+
70
+ Try uploading an image and use either of the tabs below!
71
+ """)
72
 
73
  with gr.Tab("CLIP Zero-Shot Classification"):
74
  with gr.Row():
75
  image_input = gr.Image(type="pil", label="Input Image")
76
+ text_input = gr.Textbox(label="What do you want to check in the image?",
77
+ placeholder="e.g., 'a dog', 'sunset', 'people playing'")
78
  output_text = gr.Textbox(label="Result")
79
  classify_btn = gr.Button("Classify")
80
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
 
85
  image_output = gr.Image(type="pil", label="Segmentation Result")
86
  segment_btn = gr.Button("Segment")
87
  segment_btn.click(fn=process_image_fastsam, inputs=[image_input_sam], outputs=image_output)
88
+
89
+ gr.Markdown("""
90
+ ### How to use:
91
+ 1. **CLIP Classification**: Upload an image and enter text to check if that concept exists in the image
92
+ 2. **FastSAM Segmentation**: Upload an image to get automatic segmentation with bounding boxes and masks
93
+ """)
94
 
95
+ demo.launch()