sagar007 commited on
Commit
eefe5b4
·
verified ·
1 Parent(s): 2b91c8a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -23
app.py CHANGED
@@ -6,19 +6,23 @@ import numpy as np
6
  from transformers import CLIPProcessor, CLIPModel
7
  from ultralytics import FastSAM
8
  import supervision as sv
9
- from huggingface_hub import hf_hub_download
10
 
11
  # Load CLIP model
12
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
- # Download and load FastSAM model
16
- model_path = hf_hub_download("Jiawei-Yang/FastSAM-x", filename="FastSAM-x.pt")
17
- fast_sam = FastSAM(model_path)
 
 
18
 
19
  def process_image_clip(image, text_input):
20
  if image is None:
21
  return "Please upload an image first."
 
 
22
 
23
  # Process image for CLIP
24
  inputs = processor(
@@ -43,24 +47,27 @@ def process_image_fastsam(image):
43
  # Convert PIL image to numpy array
44
  image_np = np.array(image)
45
 
46
- # Run FastSAM inference
47
- results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
48
-
49
- # Get detections
50
- detections = sv.Detections.from_ultralytics(results[0])
51
-
52
- # Create annotator
53
- box_annotator = sv.BoxAnnotator()
54
- mask_annotator = sv.MaskAnnotator()
55
-
56
- # Annotate image
57
- annotated_image = mask_annotator.annotate(scene=image_np.copy(), detections=detections)
58
- annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
59
-
60
- return Image.fromarray(annotated_image)
 
 
 
61
 
62
  # Create Gradio interface
63
- with gr.Blocks() as demo:
64
  gr.Markdown("""
65
  # CLIP and FastSAM Demo
66
  This demo combines two powerful AI models:
@@ -73,8 +80,11 @@ with gr.Blocks() as demo:
73
  with gr.Tab("CLIP Zero-Shot Classification"):
74
  with gr.Row():
75
  image_input = gr.Image(type="pil", label="Input Image")
76
- text_input = gr.Textbox(label="What do you want to check in the image?",
77
- placeholder="e.g., 'a dog', 'sunset', 'people playing'")
 
 
 
78
  output_text = gr.Textbox(label="Result")
79
  classify_btn = gr.Button("Classify")
80
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
@@ -90,6 +100,10 @@ with gr.Blocks() as demo:
90
  ### How to use:
91
  1. **CLIP Classification**: Upload an image and enter text to check if that concept exists in the image
92
  2. **FastSAM Segmentation**: Upload an image to get automatic segmentation with bounding boxes and masks
 
 
 
 
93
  """)
94
 
95
  demo.launch()
 
6
  from transformers import CLIPProcessor, CLIPModel
7
  from ultralytics import FastSAM
8
  import supervision as sv
9
+ import os
10
 
11
  # Load CLIP model
12
+ model = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
13
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
14
 
15
+ # Initialize FastSAM model
16
+ FASTSAM_WEIGHTS = "FastSAM-s.pt"
17
+ if not os.path.exists(FASTSAM_WEIGHTS):
18
+ os.system(f"wget https://huggingface.co/spaces/An-619/FastSAM/resolve/main/weights/{FASTSAM_WEIGHTS}")
19
+ fast_sam = FastSAM(FASTSAM_WEIGHTS)
20
 
21
  def process_image_clip(image, text_input):
22
  if image is None:
23
  return "Please upload an image first."
24
+ if not text_input:
25
+ return "Please enter some text to check in the image."
26
 
27
  # Process image for CLIP
28
  inputs = processor(
 
47
  # Convert PIL image to numpy array
48
  image_np = np.array(image)
49
 
50
+ try:
51
+ # Run FastSAM inference
52
+ results = fast_sam(image_np, device='cpu', retina_masks=True, imgsz=1024, conf=0.4, iou=0.9)
53
+
54
+ # Get detections
55
+ detections = sv.Detections.from_ultralytics(results[0])
56
+
57
+ # Create annotator
58
+ box_annotator = sv.BoxAnnotator()
59
+ mask_annotator = sv.MaskAnnotator()
60
+
61
+ # Annotate image
62
+ annotated_image = mask_annotator.annotate(scene=image_np.copy(), detections=detections)
63
+ annotated_image = box_annotator.annotate(scene=annotated_image, detections=detections)
64
+
65
+ return Image.fromarray(annotated_image)
66
+ except Exception as e:
67
+ return f"Error processing image: {str(e)}"
68
 
69
  # Create Gradio interface
70
+ with gr.Blocks(css="footer {visibility: hidden}") as demo:
71
  gr.Markdown("""
72
  # CLIP and FastSAM Demo
73
  This demo combines two powerful AI models:
 
80
  with gr.Tab("CLIP Zero-Shot Classification"):
81
  with gr.Row():
82
  image_input = gr.Image(type="pil", label="Input Image")
83
+ text_input = gr.Textbox(
84
+ label="What do you want to check in the image?",
85
+ placeholder="e.g., 'a dog', 'sunset', 'people playing'",
86
+ info="Enter any concept you want to check in the image"
87
+ )
88
  output_text = gr.Textbox(label="Result")
89
  classify_btn = gr.Button("Classify")
90
  classify_btn.click(fn=process_image_clip, inputs=[image_input, text_input], outputs=output_text)
 
100
  ### How to use:
101
  1. **CLIP Classification**: Upload an image and enter text to check if that concept exists in the image
102
  2. **FastSAM Segmentation**: Upload an image to get automatic segmentation with bounding boxes and masks
103
+
104
+ ### Note:
105
+ - The models run on CPU, so processing might take a few seconds
106
+ - For best results, use clear images with good lighting
107
  """)
108
 
109
  demo.launch()