from pathlib import Path import cv2 import os import json from paddleocr.tools.infer.predict_system import TextSystem import matplotlib.image as mpimg import matplotlib.pyplot as plt from typing import Union, Any, Tuple, List, Optional, Dict from PIL import Image, ImageDraw, ImageFont import math import numpy as np from const import default_config def draw_ocr_box_txt(image, boxes, txts, scores=None, show_score=False, drop_score=0.5, font_path="./fonts/simfang.ttf"): h, w = image.height, image.width img_left = image.copy() img_right = Image.new('RGB', (w, h), (255, 255, 255)) import random random.seed(0) draw_left = ImageDraw.Draw(img_left) draw_right = ImageDraw.Draw(img_right) for idx, (box, txt, score) in enumerate(zip(boxes, txts, scores)): if scores is not None and scores[idx] < drop_score: continue color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) draw_left.polygon([(i[0], i[1]) for i in box], fill=color) draw_right.polygon( [ box[0][0], box[0][1], box[1][0], box[1][1], box[2][0], box[2][1], box[3][0], box[3][1] ], outline=color) box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][ 1])**2) box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][ 1])**2) if show_score: txt = txt + ':' + str(score) if box_height > 2 * box_width: font_size = max(int(box_width * 0.9), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") cur_y = box[0][1] for c in txt: char_bbox = font.getbbox(c) draw_right.text( (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font) cur_y += char_bbox[3] - char_bbox[1] else: font_size = max(int(box_height * 0.8), 10) font = ImageFont.truetype(font_path, font_size, encoding="utf-8") draw_right.text( [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font) img_left = Image.blend(image, img_left, 0.5) img_show = Image.new('RGB', (w * 2, h), (255, 255, 255)) img_show.paste(img_left, (0, 0, w, h)) img_show.paste(img_right, (w, 0, w * 2, h)) return np.array(img_show) def draw_ocr_results(image_fp: Union[str, Path, Image.Image], ocr_outs, font_path): # Credits: adapted from https://github.com/PaddlePaddle/PaddleOCR import cv2 if isinstance(image_fp, (str, Path)): img = Image.open(image_fp).convert('RGB') else: img = image_fp txts = [] scores = [] boxes = [] for _out in ocr_outs: txts.append(_out[1][0]) scores.append(_out[1][1]) boxes.append(_out[0]) draw_img = draw_ocr_box_txt( img, boxes, txts, scores, drop_score=0.0, font_path=font_path ) # cv2.imwrite(out_draw_fp, draw_img[:, :, ::-1]) #plt.figure(figsize=(draw_img.shape[0]/100, draw_img.shape[1]/100), dpi=100) #plt.imshow(draw_img) #plt.axis('off') # Turn off axis numbers #plt.show() return draw_img class MyObject: def __init__(self, dictionary): for key in dictionary: setattr(self, key, dictionary[key]) def json_to_class_obj(args): return MyObject(args) def start_ocr(params): args = json_to_class_obj({ **default_config, **params, }) imgs = [] if os.path.isdir(args.image_dir): for i in os.listdir(args.image_dir): imgs.append(cv2.imread(os.path.join(args.image_dir, i))) else: imgs.append(cv2.imread(args.image_dir)) ps = TextSystem(args) res = [] for img in imgs: dt_boxes, rec_res, _ = ps.__call__(img, args.use_angle_cls) ocr_res = [[box.tolist(), res] for box, res in zip(dt_boxes, rec_res)] res.append(ocr_res) return res