Cyril666 commited on
Commit
1514280
·
1 Parent(s): 12c0c35

First model version

Browse files
Files changed (1) hide show
  1. app.py +34 -8
app.py CHANGED
@@ -10,20 +10,46 @@ import glob
10
  import gradio as gr
11
  from demo import get_model, preprocess, postprocess, load
12
  from utils import Config, Logger, CharsetMapper
13
- from accelerate import Accelerator
14
-
15
- accelerator = Accelerator()
16
- device = accelerator.device
17
 
18
  def process_image(image):
 
19
  config = Config('configs/rec/train_abinet.yaml')
20
  config.model_vision_checkpoint = None
21
  model = get_model(config)
22
- model = load(model, 'workdir/train-abinet/best-train-abinet.pth').to(device)
23
  charset = CharsetMapper(filename=config.dataset_charset_path, max_length=config.dataset_max_length + 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
 
25
  img = image.convert('RGB')
26
- img = preprocess(img, config.dataset_image_width, config.dataset_image_height).to(device)
27
  res = model(img)
28
  return postprocess(res, charset, 'alignment')[0][0]
29
 
@@ -32,8 +58,8 @@ description = "西北工业大学航海学院张博强毕设,目前识别部
32
  #article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2103.06495.pdf'>Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition</a> | <a href='https://github.com/FangShancheng/ABINet'>Github Repo</a></p>"
33
 
34
  iface = gr.Interface(fn=process_image,
35
- inputs=[gr.inputs.Image(type="pil")],
36
- outputs=[gr.outputs.Textbox()],
37
  title=title,
38
  description=description,
39
  examples=glob.glob('figs/test/*.png'))
 
10
  import gradio as gr
11
  from demo import get_model, preprocess, postprocess, load
12
  from utils import Config, Logger, CharsetMapper
 
 
 
 
13
 
14
  def process_image(image):
15
+ # rec model
16
  config = Config('configs/rec/train_abinet.yaml')
17
  config.model_vision_checkpoint = None
18
  model = get_model(config)
19
+ model = load(model, 'workdir/train-abinet/best-train-abinet.pth')
20
  charset = CharsetMapper(filename=config.dataset_charset_path, max_length=config.dataset_max_length + 1)
21
+
22
+ # det model
23
+ cfg.merge_from_file('./configs/det/r50_baseline.yaml')
24
+ cfg.merge_from_list(["MODEL.DEVICE", "cpu"])
25
+ det_demo = DetDemo(
26
+ cfg,
27
+ min_image_size=800,
28
+ confidence_threshold=0.7,
29
+ output_polygon=True
30
+ )
31
+
32
+ # detect
33
+ image = cv2.imread(filepath)
34
+ result_polygons, result_masks, result_boxes = det_demo.run_on_opencv_image(image)
35
+
36
+ # cut patch
37
+ patchs = [image[box[1]:box[3], box[0]:box[2], :] for box in result_boxes]
38
+ patchs = [preprocess(patch, config.dataset_image_width, config.dataset_image_height) for patch in patchs]
39
+ patchs = torch.stack(patchs, dim=0)
40
+ print(patchs.shape)
41
+ res = model(patchs)
42
+ rec_result = postprocess(res, charset, 'alignment')[0]
43
+ print(rec_result)
44
+
45
+ # visual detect results
46
+ visual_image = det_demo.visualization(image.copy(), result_polygons, result_masks, result_boxes)
47
+ cv2.imwrite('result.jpg', visual_image)
48
+ return 'result.jpg'#, pd.DataFrame(result_words)
49
+
50
 
51
  img = image.convert('RGB')
52
+ img = preprocess(img, config.dataset_image_width, config.dataset_image_height)
53
  res = model(img)
54
  return postprocess(res, charset, 'alignment')[0][0]
55
 
 
58
  #article = "<p style='text-align: center'><a href='https://arxiv.org/pdf/2103.06495.pdf'>Read Like Humans: Autonomous, Bidirectional and Iterative Language Modeling for Scene Text Recognition</a> | <a href='https://github.com/FangShancheng/ABINet'>Github Repo</a></p>"
59
 
60
  iface = gr.Interface(fn=process_image,
61
+ inputs=[gr.inputs.Image(label="image", type="filepath")],
62
+ outputs=[gr.outputs.Image(), gr.outputs.Textbox()],
63
  title=title,
64
  description=description,
65
  examples=glob.glob('figs/test/*.png'))