Spaces:

banned-historical-archives
/

bha-ocr

Sleeping

App Files Files Community

3v324v23 commited on Dec 22, 2023

Commit

b3722f6

1 Parent(s): 866dcbc

1

Browse files

Files changed (2) hide show

const.py +104 -0
utils.py +127 -0

const.py ADDED Viewed

	@@ -0,0 +1,104 @@

+default_config = {
+    'use_gpu': False,
+    'use_xpu': False,
+    'use_npu': False,
+    'ir_optim': True,
+    'use_tensorrt': False,
+    'min_subgraph_size': 15,
+    'precision': 'fp32',
+    'gpu_mem': 500,
+    'gpu_id': 0,
+    'use_onnx': False,
+    "page_num": 0,
+    "det_algorithm": 'DB',
+    'det_limit_side_len': 960,
+    'det_limit_type': "max",
+    "det_box_type": 'quad',
+    # DB parmas
+    "det_db_thresh": 0.3,
+    "det_db_box_thresh": 0.6,
+    "det_db_unclip_ratio": 1.5,
+    "max_batch_size": 10,
+    "use_dilation": False,
+    "det_db_score_mode": "fast",
+    # EAST parmas
+    "det_east_score_thresh": 0.8,
+    "det_east_cover_thresh": 0.1,
+    "det_east_nms_thresh": 0.2,
+    # SAST parmas
+    "det_sast_score_thresh": 0.5,
+    "det_sast_nms_thresh": 0.2,
+    # PSE parmas
+    "det_pse_thresh": 0,
+    "det_pse_box_thresh": 0.85,
+    "det_pse_min_area": 16,
+    "det_pse_scale": 1,
+    # FCE parmas
+    "scales": [8, 16, 32],
+    "alpha": 1.0,
+    "beta": 1.0,
+    "fourier_degree": 5,
+    # params for text recognizer
+    'rec_algorithm': 'SVTR_LCNet',
+    'rec_image_inverse': True,
+    'rec_image_shape': "3, 48, 320",
+    'rec_batch_num': 6,
+    'max_text_length': 25,
+    "rec_char_dict_path": './paddle/ppocr_keys_v1.txt',
+    'use_space_char': True,
+    # 'vis_font_path': "./doc/fonts/simfang.ttf",
+    "drop_score": 0.5,
+    # params for e2e
+    # parser.add_argument("--e2e_algorithm", type=str, default='PGNet')
+    # parser.add_argument("--e2e_model_dir", type=str)
+    # parser.add_argument("--e2e_limit_side_len", type=float, default=768)
+    # parser.add_argument("--e2e_limit_type", type=str, default='max')
+    # PGNet parmas
+    # parser.add_argument("--e2e_pgnet_score_thresh", type=float, default=0.5)
+    # parser.add_argument(
+    #    "--e2e_char_dict_path", type=str, default="./ppocr/utils/ic15_dict.txt")
+    # parser.add_argument("--e2e_pgnet_valid_set", type=str, default='totaltext')
+    # parser.add_argument("--e2e_pgnet_mode", type=str, default='fast')
+    # params for text classifier
+    "use_angle_cls": False,
+    # "cls_model_dir"
+    "cls_image_shape": "3, 48, 192",
+    "label_list": ['0', '180'],
+    "cls_batch_num": 6,
+    "cls_thresh": 0.9,
+    "enable_mkldnn": False,
+    "cpu_threads": 10,
+    "use_pdserving": False,
+    "warmup": False,
+    # SR parmas
+    "sr_image_shape": "3, 32, 128",
+    "sr_batch_num": 1,
+    "draw_img_save_dir": "./inference_results",
+    "save_crop_res": False,
+    "crop_res_save_dir": "./output",
+    # multi-process
+    "use_mp": False,
+    "total_process_num": 1,
+    "process_id": 0,
+    "benchmark": False,
+    "save_log_path": "./log_output/",
+    "show_log": False,
+}

utils.py ADDED Viewed

	@@ -0,0 +1,127 @@

+from pathlib import Path
+import cv2
+import os
+import json
+from paddleocr.tools.infer.predict_system import TextSystem
+import matplotlib.image as mpimg
+import matplotlib.pyplot as plt
+from typing import Union, Any, Tuple, List, Optional, Dict
+from PIL import Image, ImageDraw, ImageFont
+import math
+import numpy as np
+from const import default_config
+def draw_ocr_box_txt(image,
+                     boxes,
+                     txts,
+                     scores=None,
+                     show_score=False,
+                     drop_score=0.5,
+                     font_path="./fonts/simfang.ttf"):
+    h, w = image.height, image.width
+    img_left = image.copy()
+    img_right = Image.new('RGB', (w, h), (255, 255, 255))
+    import random
+    random.seed(0)
+    draw_left = ImageDraw.Draw(img_left)
+    draw_right = ImageDraw.Draw(img_right)
+    for idx, (box, txt, score) in enumerate(zip(boxes, txts, scores)):
+        if scores is not None and scores[idx] < drop_score:
+            continue
+        color = (random.randint(0, 255), random.randint(0, 255),
+                 random.randint(0, 255))
+        draw_left.polygon([(i[0], i[1]) for i in box], fill=color)
+        draw_right.polygon(
+            [
+                box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
+                box[2][1], box[3][0], box[3][1]
+            ],
+            outline=color)
+        box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
+            1])**2)
+        box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
+            1])**2)
+        if show_score:
+            txt = txt + ':' + str(score)
+        if box_height > 2 * box_width:
+            font_size = max(int(box_width * 0.9), 10)
+            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+            cur_y = box[0][1]
+            for c in txt:
+                char_bbox = font.getbbox(c)
+                draw_right.text(
+                    (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
+                cur_y += char_bbox[3] - char_bbox[1]
+        else:
+            font_size = max(int(box_height * 0.8), 10)
+            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
+            draw_right.text(
+                [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
+    img_left = Image.blend(image, img_left, 0.5)
+    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
+    img_show.paste(img_left, (0, 0, w, h))
+    img_show.paste(img_right, (w, 0, w * 2, h))
+    return np.array(img_show)
+def draw_ocr_results(image_fp: Union[str, Path, Image.Image], ocr_outs, font_path):
+    # Credits: adapted from https://github.com/PaddlePaddle/PaddleOCR
+    import cv2
+    if isinstance(image_fp, (str, Path)):
+        img = Image.open(image_fp).convert('RGB')
+    else:
+        img = image_fp
+    txts = []
+    scores = []
+    boxes = []
+    for _out in ocr_outs:
+        txts.append(_out[1][0])
+        scores.append(_out[1][1])
+        boxes.append(_out[0])
+    draw_img = draw_ocr_box_txt(
+        img, boxes, txts, scores, drop_score=0.0, font_path=font_path
+    )
+    # cv2.imwrite(out_draw_fp, draw_img[:, :, ::-1])
+    plt.figure(figsize=(draw_img.shape[0]/100, draw_img.shape[1]/100), dpi=100)
+    plt.imshow(draw_img)
+    plt.axis('off')  # Turn off axis numbers
+    plt.show()
+class MyObject:
+    def __init__(self, dictionary):
+        for key in dictionary:
+            setattr(self, key, dictionary[key])
+def json_to_class_obj(args):
+    return MyObject(args)
+def start_ocr(params):
+    args = json_to_class_obj({
+        **default_config,
+        **params,
+    })
+    imgs = []
+    if os.path.isdir(args.image_dir):
+        for i in os.listdir(args.image_dir):
+            imgs.append(cv2.imread(os.path.join(args.image_dir, i)))
+    else:
+        imgs.append(cv2.imread(args.image_dir))
+    ps = TextSystem(args)
+    res = []
+    for img in imgs:
+        dt_boxes, rec_res, _ = ps.__call__(img, args.use_angle_cls)
+        ocr_res = [[box.tolist(), res]
+                   for box, res in zip(dt_boxes, rec_res)]
+        res.append(ocr_res)
+    return res