3v324v23 commited on
Commit
b3722f6
·
1 Parent(s): 866dcbc
Files changed (2) hide show
  1. const.py +104 -0
  2. utils.py +127 -0
const.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ default_config = {
2
+ 'use_gpu': False,
3
+ 'use_xpu': False,
4
+ 'use_npu': False,
5
+ 'ir_optim': True,
6
+ 'use_tensorrt': False,
7
+ 'min_subgraph_size': 15,
8
+ 'precision': 'fp32',
9
+ 'gpu_mem': 500,
10
+ 'gpu_id': 0,
11
+
12
+ 'use_onnx': False,
13
+ "page_num": 0,
14
+ "det_algorithm": 'DB',
15
+ 'det_limit_side_len': 960,
16
+ 'det_limit_type': "max",
17
+ "det_box_type": 'quad',
18
+
19
+ # DB parmas
20
+ "det_db_thresh": 0.3,
21
+ "det_db_box_thresh": 0.6,
22
+ "det_db_unclip_ratio": 1.5,
23
+ "max_batch_size": 10,
24
+ "use_dilation": False,
25
+ "det_db_score_mode": "fast",
26
+
27
+ # EAST parmas
28
+ "det_east_score_thresh": 0.8,
29
+ "det_east_cover_thresh": 0.1,
30
+ "det_east_nms_thresh": 0.2,
31
+
32
+ # SAST parmas
33
+ "det_sast_score_thresh": 0.5,
34
+ "det_sast_nms_thresh": 0.2,
35
+
36
+ # PSE parmas
37
+ "det_pse_thresh": 0,
38
+ "det_pse_box_thresh": 0.85,
39
+ "det_pse_min_area": 16,
40
+ "det_pse_scale": 1,
41
+
42
+ # FCE parmas
43
+ "scales": [8, 16, 32],
44
+ "alpha": 1.0,
45
+ "beta": 1.0,
46
+ "fourier_degree": 5,
47
+
48
+ # params for text recognizer
49
+ 'rec_algorithm': 'SVTR_LCNet',
50
+ 'rec_image_inverse': True,
51
+ 'rec_image_shape': "3, 48, 320",
52
+ 'rec_batch_num': 6,
53
+ 'max_text_length': 25,
54
+ "rec_char_dict_path": './paddle/ppocr_keys_v1.txt',
55
+ 'use_space_char': True,
56
+ # 'vis_font_path': "./doc/fonts/simfang.ttf",
57
+ "drop_score": 0.5,
58
+
59
+ # params for e2e
60
+ # parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
61
+ # parser.add_argument("--e2e_model_dir", type=str)
62
+ # parser.add_argument("--e2e_limit_side_len", type=float, default=768)
63
+ # parser.add_argument("--e2e_limit_type", type=str, default='max')
64
+
65
+ # PGNet parmas
66
+ # parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
67
+ # parser.add_argument(
68
+ # "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
69
+ # parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
70
+ # parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
71
+
72
+ # params for text classifier
73
+ "use_angle_cls": False,
74
+ # "cls_model_dir"
75
+ "cls_image_shape": "3, 48, 192",
76
+ "label_list": ['0', '180'],
77
+ "cls_batch_num": 6,
78
+ "cls_thresh": 0.9,
79
+
80
+ "enable_mkldnn": False,
81
+ "cpu_threads": 10,
82
+ "use_pdserving": False,
83
+ "warmup": False,
84
+
85
+ # SR parmas
86
+ "sr_image_shape": "3, 32, 128",
87
+ "sr_batch_num": 1,
88
+
89
+ "draw_img_save_dir": "./inference_results",
90
+ "save_crop_res": False,
91
+ "crop_res_save_dir": "./output",
92
+
93
+ # multi-process
94
+ "use_mp": False,
95
+ "total_process_num": 1,
96
+ "process_id": 0,
97
+
98
+ "benchmark": False,
99
+ "save_log_path": "./log_output/",
100
+
101
+ "show_log": False,
102
+
103
+
104
+ }
utils.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+ import cv2
3
+ import os
4
+ import json
5
+ from paddleocr.tools.infer.predict_system import TextSystem
6
+ import matplotlib.image as mpimg
7
+ import matplotlib.pyplot as plt
8
+ from typing import Union, Any, Tuple, List, Optional, Dict
9
+ from PIL import Image, ImageDraw, ImageFont
10
+ import math
11
+ import numpy as np
12
+ from const import default_config
13
+
14
+ def draw_ocr_box_txt(image,
15
+ boxes,
16
+ txts,
17
+ scores=None,
18
+ show_score=False,
19
+ drop_score=0.5,
20
+ font_path="./fonts/simfang.ttf"):
21
+ h, w = image.height, image.width
22
+ img_left = image.copy()
23
+ img_right = Image.new('RGB', (w, h), (255, 255, 255))
24
+
25
+ import random
26
+
27
+ random.seed(0)
28
+ draw_left = ImageDraw.Draw(img_left)
29
+ draw_right = ImageDraw.Draw(img_right)
30
+ for idx, (box, txt, score) in enumerate(zip(boxes, txts, scores)):
31
+ if scores is not None and scores[idx] < drop_score:
32
+ continue
33
+ color = (random.randint(0, 255), random.randint(0, 255),
34
+ random.randint(0, 255))
35
+ draw_left.polygon([(i[0], i[1]) for i in box], fill=color)
36
+ draw_right.polygon(
37
+ [
38
+ box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
39
+ box[2][1], box[3][0], box[3][1]
40
+ ],
41
+ outline=color)
42
+ box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
43
+ 1])**2)
44
+ box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
45
+ 1])**2)
46
+ if show_score:
47
+ txt = txt + ':' + str(score)
48
+ if box_height > 2 * box_width:
49
+ font_size = max(int(box_width * 0.9), 10)
50
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
51
+ cur_y = box[0][1]
52
+ for c in txt:
53
+ char_bbox = font.getbbox(c)
54
+ draw_right.text(
55
+ (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
56
+ cur_y += char_bbox[3] - char_bbox[1]
57
+ else:
58
+ font_size = max(int(box_height * 0.8), 10)
59
+ font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
60
+ draw_right.text(
61
+ [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
62
+ img_left = Image.blend(image, img_left, 0.5)
63
+ img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
64
+ img_show.paste(img_left, (0, 0, w, h))
65
+ img_show.paste(img_right, (w, 0, w * 2, h))
66
+ return np.array(img_show)
67
+
68
+ def draw_ocr_results(image_fp: Union[str, Path, Image.Image], ocr_outs, font_path):
69
+ # Credits: adapted from https://github.com/PaddlePaddle/PaddleOCR
70
+ import cv2
71
+
72
+ if isinstance(image_fp, (str, Path)):
73
+ img = Image.open(image_fp).convert('RGB')
74
+ else:
75
+ img = image_fp
76
+
77
+ txts = []
78
+ scores = []
79
+ boxes = []
80
+ for _out in ocr_outs:
81
+ txts.append(_out[1][0])
82
+ scores.append(_out[1][1])
83
+ boxes.append(_out[0])
84
+
85
+ draw_img = draw_ocr_box_txt(
86
+ img, boxes, txts, scores, drop_score=0.0, font_path=font_path
87
+ )
88
+
89
+ # cv2.imwrite(out_draw_fp, draw_img[:, :, ::-1])
90
+
91
+ plt.figure(figsize=(draw_img.shape[0]/100, draw_img.shape[1]/100), dpi=100)
92
+ plt.imshow(draw_img)
93
+ plt.axis('off') # Turn off axis numbers
94
+ plt.show()
95
+
96
+ class MyObject:
97
+ def __init__(self, dictionary):
98
+ for key in dictionary:
99
+ setattr(self, key, dictionary[key])
100
+
101
+ def json_to_class_obj(args):
102
+ return MyObject(args)
103
+
104
+ def start_ocr(params):
105
+ args = json_to_class_obj({
106
+ **default_config,
107
+ **params,
108
+ })
109
+
110
+ imgs = []
111
+ if os.path.isdir(args.image_dir):
112
+ for i in os.listdir(args.image_dir):
113
+ imgs.append(cv2.imread(os.path.join(args.image_dir, i)))
114
+ else:
115
+ imgs.append(cv2.imread(args.image_dir))
116
+
117
+ ps = TextSystem(args)
118
+
119
+ res = []
120
+ for img in imgs:
121
+ dt_boxes, rec_res, _ = ps.__call__(img, args.use_angle_cls)
122
+
123
+ ocr_res = [[box.tolist(), res]
124
+ for box, res in zip(dt_boxes, rec_res)]
125
+
126
+ res.append(ocr_res)
127
+ return res