Spaces:
Sleeping
Sleeping
const.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
default_config = {
|
2 |
+
'use_gpu': False,
|
3 |
+
'use_xpu': False,
|
4 |
+
'use_npu': False,
|
5 |
+
'ir_optim': True,
|
6 |
+
'use_tensorrt': False,
|
7 |
+
'min_subgraph_size': 15,
|
8 |
+
'precision': 'fp32',
|
9 |
+
'gpu_mem': 500,
|
10 |
+
'gpu_id': 0,
|
11 |
+
|
12 |
+
'use_onnx': False,
|
13 |
+
"page_num": 0,
|
14 |
+
"det_algorithm": 'DB',
|
15 |
+
'det_limit_side_len': 960,
|
16 |
+
'det_limit_type': "max",
|
17 |
+
"det_box_type": 'quad',
|
18 |
+
|
19 |
+
# DB parmas
|
20 |
+
"det_db_thresh": 0.3,
|
21 |
+
"det_db_box_thresh": 0.6,
|
22 |
+
"det_db_unclip_ratio": 1.5,
|
23 |
+
"max_batch_size": 10,
|
24 |
+
"use_dilation": False,
|
25 |
+
"det_db_score_mode": "fast",
|
26 |
+
|
27 |
+
# EAST parmas
|
28 |
+
"det_east_score_thresh": 0.8,
|
29 |
+
"det_east_cover_thresh": 0.1,
|
30 |
+
"det_east_nms_thresh": 0.2,
|
31 |
+
|
32 |
+
# SAST parmas
|
33 |
+
"det_sast_score_thresh": 0.5,
|
34 |
+
"det_sast_nms_thresh": 0.2,
|
35 |
+
|
36 |
+
# PSE parmas
|
37 |
+
"det_pse_thresh": 0,
|
38 |
+
"det_pse_box_thresh": 0.85,
|
39 |
+
"det_pse_min_area": 16,
|
40 |
+
"det_pse_scale": 1,
|
41 |
+
|
42 |
+
# FCE parmas
|
43 |
+
"scales": [8, 16, 32],
|
44 |
+
"alpha": 1.0,
|
45 |
+
"beta": 1.0,
|
46 |
+
"fourier_degree": 5,
|
47 |
+
|
48 |
+
# params for text recognizer
|
49 |
+
'rec_algorithm': 'SVTR_LCNet',
|
50 |
+
'rec_image_inverse': True,
|
51 |
+
'rec_image_shape': "3, 48, 320",
|
52 |
+
'rec_batch_num': 6,
|
53 |
+
'max_text_length': 25,
|
54 |
+
"rec_char_dict_path": './paddle/ppocr_keys_v1.txt',
|
55 |
+
'use_space_char': True,
|
56 |
+
# 'vis_font_path': "./doc/fonts/simfang.ttf",
|
57 |
+
"drop_score": 0.5,
|
58 |
+
|
59 |
+
# params for e2e
|
60 |
+
# parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
|
61 |
+
# parser.add_argument("--e2e_model_dir", type=str)
|
62 |
+
# parser.add_argument("--e2e_limit_side_len", type=float, default=768)
|
63 |
+
# parser.add_argument("--e2e_limit_type", type=str, default='max')
|
64 |
+
|
65 |
+
# PGNet parmas
|
66 |
+
# parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
|
67 |
+
# parser.add_argument(
|
68 |
+
# "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
|
69 |
+
# parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
|
70 |
+
# parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
|
71 |
+
|
72 |
+
# params for text classifier
|
73 |
+
"use_angle_cls": False,
|
74 |
+
# "cls_model_dir"
|
75 |
+
"cls_image_shape": "3, 48, 192",
|
76 |
+
"label_list": ['0', '180'],
|
77 |
+
"cls_batch_num": 6,
|
78 |
+
"cls_thresh": 0.9,
|
79 |
+
|
80 |
+
"enable_mkldnn": False,
|
81 |
+
"cpu_threads": 10,
|
82 |
+
"use_pdserving": False,
|
83 |
+
"warmup": False,
|
84 |
+
|
85 |
+
# SR parmas
|
86 |
+
"sr_image_shape": "3, 32, 128",
|
87 |
+
"sr_batch_num": 1,
|
88 |
+
|
89 |
+
"draw_img_save_dir": "./inference_results",
|
90 |
+
"save_crop_res": False,
|
91 |
+
"crop_res_save_dir": "./output",
|
92 |
+
|
93 |
+
# multi-process
|
94 |
+
"use_mp": False,
|
95 |
+
"total_process_num": 1,
|
96 |
+
"process_id": 0,
|
97 |
+
|
98 |
+
"benchmark": False,
|
99 |
+
"save_log_path": "./log_output/",
|
100 |
+
|
101 |
+
"show_log": False,
|
102 |
+
|
103 |
+
|
104 |
+
}
|
utils.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
import json
|
5 |
+
from paddleocr.tools.infer.predict_system import TextSystem
|
6 |
+
import matplotlib.image as mpimg
|
7 |
+
import matplotlib.pyplot as plt
|
8 |
+
from typing import Union, Any, Tuple, List, Optional, Dict
|
9 |
+
from PIL import Image, ImageDraw, ImageFont
|
10 |
+
import math
|
11 |
+
import numpy as np
|
12 |
+
from const import default_config
|
13 |
+
|
14 |
+
def draw_ocr_box_txt(image,
|
15 |
+
boxes,
|
16 |
+
txts,
|
17 |
+
scores=None,
|
18 |
+
show_score=False,
|
19 |
+
drop_score=0.5,
|
20 |
+
font_path="./fonts/simfang.ttf"):
|
21 |
+
h, w = image.height, image.width
|
22 |
+
img_left = image.copy()
|
23 |
+
img_right = Image.new('RGB', (w, h), (255, 255, 255))
|
24 |
+
|
25 |
+
import random
|
26 |
+
|
27 |
+
random.seed(0)
|
28 |
+
draw_left = ImageDraw.Draw(img_left)
|
29 |
+
draw_right = ImageDraw.Draw(img_right)
|
30 |
+
for idx, (box, txt, score) in enumerate(zip(boxes, txts, scores)):
|
31 |
+
if scores is not None and scores[idx] < drop_score:
|
32 |
+
continue
|
33 |
+
color = (random.randint(0, 255), random.randint(0, 255),
|
34 |
+
random.randint(0, 255))
|
35 |
+
draw_left.polygon([(i[0], i[1]) for i in box], fill=color)
|
36 |
+
draw_right.polygon(
|
37 |
+
[
|
38 |
+
box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
|
39 |
+
box[2][1], box[3][0], box[3][1]
|
40 |
+
],
|
41 |
+
outline=color)
|
42 |
+
box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
|
43 |
+
1])**2)
|
44 |
+
box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
|
45 |
+
1])**2)
|
46 |
+
if show_score:
|
47 |
+
txt = txt + ':' + str(score)
|
48 |
+
if box_height > 2 * box_width:
|
49 |
+
font_size = max(int(box_width * 0.9), 10)
|
50 |
+
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
51 |
+
cur_y = box[0][1]
|
52 |
+
for c in txt:
|
53 |
+
char_bbox = font.getbbox(c)
|
54 |
+
draw_right.text(
|
55 |
+
(box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
|
56 |
+
cur_y += char_bbox[3] - char_bbox[1]
|
57 |
+
else:
|
58 |
+
font_size = max(int(box_height * 0.8), 10)
|
59 |
+
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
|
60 |
+
draw_right.text(
|
61 |
+
[box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
|
62 |
+
img_left = Image.blend(image, img_left, 0.5)
|
63 |
+
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
|
64 |
+
img_show.paste(img_left, (0, 0, w, h))
|
65 |
+
img_show.paste(img_right, (w, 0, w * 2, h))
|
66 |
+
return np.array(img_show)
|
67 |
+
|
68 |
+
def draw_ocr_results(image_fp: Union[str, Path, Image.Image], ocr_outs, font_path):
|
69 |
+
# Credits: adapted from https://github.com/PaddlePaddle/PaddleOCR
|
70 |
+
import cv2
|
71 |
+
|
72 |
+
if isinstance(image_fp, (str, Path)):
|
73 |
+
img = Image.open(image_fp).convert('RGB')
|
74 |
+
else:
|
75 |
+
img = image_fp
|
76 |
+
|
77 |
+
txts = []
|
78 |
+
scores = []
|
79 |
+
boxes = []
|
80 |
+
for _out in ocr_outs:
|
81 |
+
txts.append(_out[1][0])
|
82 |
+
scores.append(_out[1][1])
|
83 |
+
boxes.append(_out[0])
|
84 |
+
|
85 |
+
draw_img = draw_ocr_box_txt(
|
86 |
+
img, boxes, txts, scores, drop_score=0.0, font_path=font_path
|
87 |
+
)
|
88 |
+
|
89 |
+
# cv2.imwrite(out_draw_fp, draw_img[:, :, ::-1])
|
90 |
+
|
91 |
+
plt.figure(figsize=(draw_img.shape[0]/100, draw_img.shape[1]/100), dpi=100)
|
92 |
+
plt.imshow(draw_img)
|
93 |
+
plt.axis('off') # Turn off axis numbers
|
94 |
+
plt.show()
|
95 |
+
|
96 |
+
class MyObject:
|
97 |
+
def __init__(self, dictionary):
|
98 |
+
for key in dictionary:
|
99 |
+
setattr(self, key, dictionary[key])
|
100 |
+
|
101 |
+
def json_to_class_obj(args):
|
102 |
+
return MyObject(args)
|
103 |
+
|
104 |
+
def start_ocr(params):
|
105 |
+
args = json_to_class_obj({
|
106 |
+
**default_config,
|
107 |
+
**params,
|
108 |
+
})
|
109 |
+
|
110 |
+
imgs = []
|
111 |
+
if os.path.isdir(args.image_dir):
|
112 |
+
for i in os.listdir(args.image_dir):
|
113 |
+
imgs.append(cv2.imread(os.path.join(args.image_dir, i)))
|
114 |
+
else:
|
115 |
+
imgs.append(cv2.imread(args.image_dir))
|
116 |
+
|
117 |
+
ps = TextSystem(args)
|
118 |
+
|
119 |
+
res = []
|
120 |
+
for img in imgs:
|
121 |
+
dt_boxes, rec_res, _ = ps.__call__(img, args.use_angle_cls)
|
122 |
+
|
123 |
+
ocr_res = [[box.tolist(), res]
|
124 |
+
for box, res in zip(dt_boxes, rec_res)]
|
125 |
+
|
126 |
+
res.append(ocr_res)
|
127 |
+
return res
|