topdu commited on
Commit
695a4a4
1 Parent(s): ac9bf47

update app

Browse files
app.py CHANGED
@@ -1,3 +1,6 @@
 
 
 
1
  import os
2
  import gradio as gr # gradio==4.20.0
3
 
@@ -20,10 +23,20 @@ font_path = './simfang.ttf'
20
  check_and_download_font(font_path)
21
 
22
 
23
- def main(input_image):
 
 
 
 
 
24
  img = input_image[:, :, ::-1]
25
  starttime = time.time()
26
- results, time_dict, mask = text_sys(img_numpy=img, return_mask=True)
 
 
 
 
 
27
  elapse = time.time() - starttime
28
  save_pred = json.dumps(results[0], ensure_ascii=False)
29
  image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
@@ -35,10 +48,10 @@ def main(input_image):
35
  boxes,
36
  txts,
37
  scores,
38
- drop_score=drop_score,
39
  font_path=font_path,
40
  )
41
- mask = mask[0, 0, :, :] > 0.3
42
  return save_pred, elapse, draw_img, mask.astype('uint8') * 255
43
 
44
 
@@ -75,15 +88,6 @@ def find_file_in_current_dir_and_subdirs(file_name):
75
  return relative_path
76
 
77
 
78
- def predict1(input_image, Model_type, OCR_type):
79
- if OCR_type == 'E2E':
80
- return 11111, 'E2E', input_image
81
- elif OCR_type == 'STR':
82
- return 11111, 'STR', input_image
83
- else:
84
- return 11111, 'STD', input_image
85
-
86
-
87
  e2e_img_example = list_image_paths('./OCR_e2e_img')
88
 
89
  if __name__ == '__main__':
@@ -103,6 +107,45 @@ if __name__ == '__main__':
103
  label='Examples')
104
  downstream = gr.Button('Run')
105
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  with gr.Column(scale=1):
107
  img_mask = gr.Image(label='mask',
108
  interactive=False,
@@ -116,7 +159,9 @@ if __name__ == '__main__':
116
 
117
  downstream.click(fn=main,
118
  inputs=[
119
- input_image,
 
 
120
  ],
121
  outputs=[
122
  output,
 
1
+ # -*- encoding: utf-8 -*-
2
+ # @Author: OpenOCR
3
+ # @Contact: [email protected]
4
  import os
5
  import gradio as gr # gradio==4.20.0
6
 
 
23
  check_and_download_font(font_path)
24
 
25
 
26
+ def main(input_image,
27
+ rec_drop_score=0.01,
28
+ mask_thresh=0.3,
29
+ box_thresh=0.6,
30
+ unclip_ratio=1.5,
31
+ det_score_mode='slow'):
32
  img = input_image[:, :, ::-1]
33
  starttime = time.time()
34
+ results, time_dict, mask = text_sys(img_numpy=img,
35
+ return_mask=True,
36
+ thresh=mask_thresh,
37
+ box_thresh=box_thresh,
38
+ unclip_ratio=unclip_ratio,
39
+ score_mode=det_score_mode)
40
  elapse = time.time() - starttime
41
  save_pred = json.dumps(results[0], ensure_ascii=False)
42
  image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
 
48
  boxes,
49
  txts,
50
  scores,
51
+ drop_score=rec_drop_score,
52
  font_path=font_path,
53
  )
54
+ mask = mask[0, 0, :, :] > mask_thresh
55
  return save_pred, elapse, draw_img, mask.astype('uint8') * 255
56
 
57
 
 
88
  return relative_path
89
 
90
 
 
 
 
 
 
 
 
 
 
91
  e2e_img_example = list_image_paths('./OCR_e2e_img')
92
 
93
  if __name__ == '__main__':
 
107
  label='Examples')
108
  downstream = gr.Button('Run')
109
 
110
+ with gr.Row():
111
+ with gr.Column():
112
+ rec_drop_score_slider = gr.Slider(
113
+ 0.0,
114
+ 1.0,
115
+ value=0.01,
116
+ step=0.01,
117
+ label="Recognition Drop Score",
118
+ info="Recognition confidence threshold, default value is 0.01. Recognition results and corresponding text boxes lower than this threshold are discarded.")
119
+ mask_thresh_slider = gr.Slider(
120
+ 0.0,
121
+ 1.0,
122
+ value=0.3,
123
+ step=0.01,
124
+ label="Mask Threshold",
125
+ info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.")
126
+ with gr.Column():
127
+ box_thresh_slider = gr.Slider(
128
+ 0.0,
129
+ 1.0,
130
+ value=0.6,
131
+ step=0.01,
132
+ label="Box Threshold",
133
+ info="Text Box Confidence Threshold, default value is 0.6, turn it down if there is text being missed.")
134
+ unclip_ratio_slider = gr.Slider(
135
+ 1.5,
136
+ 2.0,
137
+ value=1.5,
138
+ step=0.05,
139
+ label="Unclip Ratio",
140
+ info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.")
141
+
142
+ det_score_mode_dropdown = gr.Dropdown(
143
+ ["slow", "fast"],
144
+ value="slow",
145
+ label="Det Score Mode",
146
+ info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate."
147
+ )
148
+
149
  with gr.Column(scale=1):
150
  img_mask = gr.Image(label='mask',
151
  interactive=False,
 
159
 
160
  downstream.click(fn=main,
161
  inputs=[
162
+ input_image, rec_drop_score_slider,
163
+ mask_thresh_slider, box_thresh_slider,
164
+ unclip_ratio_slider, det_score_mode_dropdown
165
  ],
166
  outputs=[
167
  output,
configs/det/dbnet/repvit_db.yml CHANGED
@@ -53,7 +53,7 @@ Architecture:
53
  PostProcess:
54
  name: DBPostProcess
55
  thresh: 0.3
56
- box_thresh: 0.4
57
  max_candidates: 1000
58
  unclip_ratio: 1.5
59
  score_mode: 'slow'
 
53
  PostProcess:
54
  name: DBPostProcess
55
  thresh: 0.3
56
+ box_thresh: 0.6
57
  max_candidates: 1000
58
  unclip_ratio: 1.5
59
  score_mode: 'slow'
opendet/postprocess/db_postprocess.py CHANGED
@@ -208,7 +208,12 @@ class DBPostProcess(object):
208
  cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
209
  return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
210
 
211
- def __call__(self, outs_dict, shape_list):
 
 
 
 
 
212
  pred = outs_dict['maps']
213
  if isinstance(pred, torch.Tensor):
214
  pred = pred.detach().cpu().numpy()
 
208
  cv2.fillPoly(mask, contour.reshape(1, -1, 2).astype('int32'), 1)
209
  return cv2.mean(bitmap[ymin:ymax + 1, xmin:xmax + 1], mask)[0]
210
 
211
+ def __call__(self, outs_dict, shape_list, **kwargs):
212
+ self.thresh= kwargs.get('thresh', self.thresh)
213
+ self.box_thresh = kwargs.get('box_thresh', self.box_thresh)
214
+ self.unclip_ratio = kwargs.get('unclip_ratio', self.unclip_ratio)
215
+ self.box_type = kwargs.get('box_type', self.box_type)
216
+ self.score_mode = kwargs.get('score_mode', self.score_mode)
217
  pred = outs_dict['maps']
218
  if isinstance(pred, torch.Tensor):
219
  pred = pred.detach().cpu().numpy()
tools/infer_det.py CHANGED
@@ -353,7 +353,8 @@ class OpenDetector(object):
353
  img_path=None,
354
  img_numpy_list=None,
355
  img_numpy=None,
356
- return_mask=False):
 
357
  """
358
  对输入图像进行处理,并返回处理结果。
359
 
@@ -400,7 +401,7 @@ class OpenDetector(object):
400
  t_start = time.time()
401
  preds = self.model(images)
402
  t_cost = time.time() - t_start
403
- post_result = self.post_process_class(preds, shape_list)
404
 
405
  info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
406
  if return_mask:
 
353
  img_path=None,
354
  img_numpy_list=None,
355
  img_numpy=None,
356
+ return_mask=False,
357
+ **kwargs):
358
  """
359
  对输入图像进行处理,并返回处理结果。
360
 
 
401
  t_start = time.time()
402
  preds = self.model(images)
403
  t_cost = time.time() - t_start
404
+ post_result = self.post_process_class(preds, shape_list, **kwargs)
405
 
406
  info = {'boxes': post_result[0]['points'], 'elapse': t_cost}
407
  if return_mask:
tools/infer_e2e.py CHANGED
@@ -182,14 +182,15 @@ class OpenOCR(object):
182
  ori_img,
183
  crop_infer=False,
184
  rec_batch_num=6,
185
- return_mask=False):
 
186
  start = time.time()
187
  if crop_infer:
188
  dt_boxes = self.text_detector.crop_infer(
189
  img_numpy=img_numpy)[0]['boxes']
190
  else:
191
  det_res = self.text_detector(img_numpy=img_numpy,
192
- return_mask=return_mask)[0]
193
  dt_boxes = det_res['boxes']
194
  # logger.info(dt_boxes)
195
  det_time_cost = time.time() - start
@@ -247,7 +248,8 @@ class OpenOCR(object):
247
  img_numpy=None,
248
  rec_batch_num=6,
249
  crop_infer=False,
250
- return_mask=False):
 
251
  """
252
  img_path: str, optional, default=None
253
  Path to the directory containing images or the image filename.
@@ -278,13 +280,15 @@ class OpenOCR(object):
278
  ori_img=ori_img,
279
  crop_infer=crop_infer,
280
  rec_batch_num=rec_batch_num,
281
- return_mask=return_mask)
 
282
  else:
283
  dt_boxes, rec_res, time_dict = self.infer_single_image(
284
  img_numpy=img,
285
  ori_img=ori_img,
286
  crop_infer=crop_infer,
287
- rec_batch_num=rec_batch_num)
 
288
  if dt_boxes is None:
289
  results.append([])
290
  time_dicts.append({})
@@ -324,7 +328,8 @@ class OpenOCR(object):
324
  img_numpy=img_numpy,
325
  ori_img=ori_img,
326
  crop_infer=crop_infer,
327
- rec_batch_num=rec_batch_num)
 
328
  if dt_boxes is None:
329
  res_list.append([])
330
  time_dicts.append({})
 
182
  ori_img,
183
  crop_infer=False,
184
  rec_batch_num=6,
185
+ return_mask=False,
186
+ **kwargs):
187
  start = time.time()
188
  if crop_infer:
189
  dt_boxes = self.text_detector.crop_infer(
190
  img_numpy=img_numpy)[0]['boxes']
191
  else:
192
  det_res = self.text_detector(img_numpy=img_numpy,
193
+ return_mask=return_mask, **kwargs)[0]
194
  dt_boxes = det_res['boxes']
195
  # logger.info(dt_boxes)
196
  det_time_cost = time.time() - start
 
248
  img_numpy=None,
249
  rec_batch_num=6,
250
  crop_infer=False,
251
+ return_mask=False,
252
+ **kwargs):
253
  """
254
  img_path: str, optional, default=None
255
  Path to the directory containing images or the image filename.
 
280
  ori_img=ori_img,
281
  crop_infer=crop_infer,
282
  rec_batch_num=rec_batch_num,
283
+ return_mask=return_mask,
284
+ **kwargs)
285
  else:
286
  dt_boxes, rec_res, time_dict = self.infer_single_image(
287
  img_numpy=img,
288
  ori_img=ori_img,
289
  crop_infer=crop_infer,
290
+ rec_batch_num=rec_batch_num,
291
+ **kwargs)
292
  if dt_boxes is None:
293
  results.append([])
294
  time_dicts.append({})
 
328
  img_numpy=img_numpy,
329
  ori_img=ori_img,
330
  crop_infer=crop_infer,
331
+ rec_batch_num=rec_batch_num,
332
+ **kwargs)
333
  if dt_boxes is None:
334
  res_list.append([])
335
  time_dicts.append({})