YouLiXiya commited on
Commit
5e8bab7
1 Parent(s): a1c9b3e

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +164 -0
app.py ADDED
@@ -0,0 +1,164 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import cv2
3
+ import gradio as gr
4
+ from PIL import Image
5
+ import numpy as np
6
+ from sam_extension.utils import add_points_tag, add_boxes_tag, mask2greyimg
7
+ from sam_extension.pipeline import SAMEncoderPipeline, SAMDecoderPipeline, GroundingDinoPipeline
8
+ point_coords = []
9
+ point_labels = []
10
+ boxes = []
11
+ boxes_point = []
12
+ texts = []
13
+ sam_encoder_pipeline = None
14
+ sam_decoder_pipeline = None
15
+ result_list = []
16
+ result_index_list = []
17
+ mask_result_list = []
18
+ mask_result_index_list = []
19
+ def resize(image, des_max=512):
20
+ h, w = image.shape[:2]
21
+ if h >= w:
22
+ new_h = des_max
23
+ new_w = int(des_max * w / h)
24
+ else:
25
+ new_w = des_max
26
+ new_h = int(des_max * h / w)
27
+ return cv2.resize(image, (new_w, new_h))
28
+ def show_prompt(img, prompt_mode, pos_point, evt: gr.SelectData): # SelectData is a subclass of EventData
29
+ global point_coords, point_labels, boxes_point, boxes
30
+ if prompt_mode == 'point':
31
+ point_coords.append([evt.index[0], evt.index[1]])
32
+ point_labels.append(1 if pos_point else 0)
33
+ result_img = add_points_tag(img, np.array(point_labels), np.array(point_coords))
34
+ elif prompt_mode == 'box':
35
+ boxes_point.append(evt.index[0])
36
+ boxes_point.append(evt.index[1])
37
+ if len(boxes_point) == 4:
38
+ boxes.append(boxes_point)
39
+ boxes_point = []
40
+ result_img = add_boxes_tag(img, np.array(boxes))
41
+ else:
42
+ result_img = img
43
+ return result_img, point_coords, point_labels, boxes_point, boxes
44
+
45
+ def reset_points(img):
46
+ global point_coords, point_labels
47
+ point_coords = []
48
+ point_labels = []
49
+ return img, point_coords, point_labels
50
+
51
+
52
+ def reset_boxes(img):
53
+ global boxes_point, boxes
54
+ boxes_point = []
55
+ boxes = []
56
+ return img, boxes_point, boxes
57
+
58
+ def load_sam(sam_ckpt_path, sam_version):
59
+ global sam_encoder_pipeline, sam_decoder_pipeline
60
+ sam_encoder_pipeline = SAMEncoderPipeline.from_pretrained(ckpt_path=sam_ckpt_path, sam_version=sam_version, device='cpu')
61
+ sam_decoder_pipeline = SAMDecoderPipeline.from_pretrained(ckpt_path=sam_ckpt_path, sam_version=sam_version, device='cpu')
62
+ return 'sam loaded!'
63
+
64
+
65
+ def generate_mask(img, prompt_mode, text_prompt):
66
+ global result_list, mask_result_list, result_index_list, mask_result_index_list
67
+ image = Image.fromarray(img)
68
+ img_size = sam_decoder_pipeline.img_size
69
+ des_img = image.resize((img_size, img_size))
70
+ sam_encoder_output = sam_encoder_pipeline(des_img)
71
+ if prompt_mode == 'point':
72
+ point_coords_ = np.array(point_coords)
73
+ point_labels_ = np.array(point_labels)
74
+ boxes_ = None
75
+ texts_ = None
76
+ grounding_dino_pipeline = None
77
+ elif prompt_mode == 'box':
78
+ point_coords_ = None
79
+ point_labels_ = None
80
+ boxes_ = np.array(boxes)
81
+ texts_ = None
82
+ grounding_dino_pipeline = None
83
+ else:
84
+ point_coords_ = None
85
+ point_labels_ = None
86
+ boxes_ = None
87
+ texts_ = text_prompt.split(',')
88
+ grounding_dino_pipeline = GroundingDinoPipeline.from_pretrained(
89
+ 'GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py',
90
+ 'weights/groundingdino/groundingdino_swint_ogc.pth',
91
+ device='cpu')
92
+ result_list, mask_result_list, masks_list = sam_decoder_pipeline.visualize_results(
93
+ image,
94
+ des_img,
95
+ sam_encoder_output,
96
+ point_coords=point_coords_,
97
+ point_labels=point_labels_,
98
+ boxes=boxes_,
99
+ texts=texts_,
100
+ grounding_dino_pipeline=grounding_dino_pipeline,
101
+ multimask_output=True,
102
+ visualize_promts=True,
103
+ pil=False)
104
+ # result_index_list = [f'result_{i}' for i in range(len(result_list))]
105
+ # mask_result_index_list = [f'mask_result_{i}' for i in range(len(mask_result_list))]
106
+ return 'mask generated!', f'result_num : {len(result_list)}', f'mask_result_num : {len(masks_list)}'
107
+ # mask_grey_result_list = mask2greyimg(masks_list, False)
108
+
109
+
110
+ def show_result(result_index):
111
+ return result_list[int(result_index)]
112
+
113
+
114
+ def show_mask_result(mask_result_index):
115
+ return mask_result_list[int(mask_result_index)]
116
+
117
+
118
+ with gr.Blocks() as demo:
119
+ with gr.Row():
120
+ img = gr.Image(None, width=400, height=400, label='input_image', type='numpy')
121
+ result_img = gr.Image(None, width=400, height=400, label='output_image', type='numpy')
122
+ with gr.Row():
123
+ pos_point = gr.Checkbox(value=True, label='pos_point')
124
+ prompt_mode = gr.Dropdown(choices=['point', 'box', 'text'], value='point', label='prompt_mode')
125
+ with gr.Row():
126
+ point_coords_text = gr.Textbox(value=str(point_coords), interactive=True, label='point_coords')
127
+ point_labels_text = gr.Textbox(value=str(point_labels), interactive=True, label='point_labels')
128
+ reset_points_bu = gr.Button(value='reset_points')
129
+ reset_points_bu.click(fn=reset_points, inputs=[img], outputs=[result_img, point_coords_text, point_labels_text])
130
+ with gr.Row():
131
+ boxes_point_text = gr.Textbox(value=str(boxes_point), interactive=True, label='boxes_point')
132
+ boxes_text = gr.Textbox(value=str(boxes), interactive=True, label='boxes')
133
+ reset_boxes_bu = gr.Button(value='reset_boxes')
134
+ reset_boxes_bu.click(fn=reset_boxes, inputs=[img], outputs=[result_img, boxes_point_text, boxes_text])
135
+ with gr.Row():
136
+ text_prompt = gr.Textbox(value='', interactive=True, label='text_prompt')
137
+ with gr.Row():
138
+ sam_ckpt_path = gr.Dropdown(choices=['weights/sam/mobile_sam.pt'],
139
+ value='weights/sam/mobile_sam.pt',
140
+ label='SAM ckpt_path')
141
+ sam_version = gr.Dropdown(choices=['mobile_sam'],
142
+ value='mobile_sam',
143
+ label='SAM version')
144
+ load_sam_bu = gr.Button(value='load SAM')
145
+ sam_load_text = gr.Textbox(value='', interactive=True, label='sam_load')
146
+ load_sam_bu.click(fn=load_sam, inputs=[sam_ckpt_path, sam_version], outputs=sam_load_text)
147
+ with gr.Row():
148
+ result_num_text = gr.Textbox(value='', interactive=True, label='result_num')
149
+ result_index = gr.Number(value=0, label='result_index')
150
+ show_result_bu = gr.Button(value='show_result')
151
+ show_result_bu.click(fn=show_result, inputs=[result_index], outputs=[result_img])
152
+ with gr.Row():
153
+ mask_result_num_text = gr.Textbox(value='', interactive=True, label='mask_result_num')
154
+ mask_result_index = gr.Number(value=0, label='mask_result_index')
155
+ show_mask_result_bu = gr.Button(value='show_mask_result')
156
+ show_mask_result_bu.click(fn=show_mask_result, inputs=[mask_result_index], outputs=[result_img])
157
+ with gr.Row():
158
+ generate_masks_bu = gr.Button(value='SAM generate masks')
159
+ sam_text = gr.Textbox(value='', interactive=True, label='SAM')
160
+ generate_masks_bu.click(fn=generate_mask, inputs=[img, prompt_mode, text_prompt], outputs=[sam_text, result_num_text, mask_result_num_text])
161
+ img.select(show_prompt, [img, prompt_mode, pos_point], [result_img, point_coords_text, point_labels_text, boxes_point_text, boxes_text])
162
+ if __name__ == '__main__':
163
+ os.system('cd/GroundingDINO && pip install -e. && cd.. && cd segment_anything && pip install -e.')
164
+ demo.launch()