# -*- encoding: utf-8 -*- import json import time from pathlib import Path import cv2 import gradio as gr from rapidocr_onnxruntime import RapidOCR from utils import visualize font_dict = { 'ch': 'FZYTK.TTF', 'japan': 'japan.ttc', 'korean': 'korean.ttf', 'en': 'FZYTK.TTF', } def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5, text_det=None, text_rec=None): out_log_list = [] det_model_path = str(Path('models') / 'text_det' / text_det) rec_model_path = str(Path('models') / 'text_rec' / text_rec) if 'v2' in rec_model_path: rec_image_shape = [3, 32, 320] else: rec_image_shape = [3, 48, 320] out_log_list.append('Init Model') s = time.time() rapid_ocr = RapidOCR(det_model_path=det_model_path, rec_model_path=rec_model_path, rec_img_shape=rec_image_shape) elapse = time.time() - s if 'ch' in rec_model_path or 'en' in rec_model_path: lan_name = 'ch' elif 'japan' in rec_model_path: lan_name = 'japan' elif 'korean' in rec_model_path: lan_name = 'korean' else: lan_name = 'ch' out_log_list.append(f'Init Model cost: {elapse:.5f}') out_log_list.extend([f'det_model: {det_model_path}', f'rec_model: {rec_model_path}', f'rec_image_shape: {rec_image_shape}']) img = cv2.imread(img_path) ocr_result, infer_elapse = rapid_ocr(img, box_thresh=box_thresh, unclip_ratio=unclip_ratio, text_score=text_score) det_cost, cls_cost, rec_cost = infer_elapse out_log_list.extend([f'det cost: {det_cost:.5f}', f'cls cost: {cls_cost:.5f}', f'rec cost: {rec_cost:.5f}']) out_log = '\n'.join([str(v) for v in out_log_list]) if not ocr_result: return img_path, '未识别到有效文本', out_log dt_boxes, rec_res, scores = list(zip(*ocr_result)) font_path = Path('fonts') / font_dict.get(lan_name) img_save_path = visualize(img_path, dt_boxes, rec_res, scores, font_path=str(font_path)) # output_text = [f'{one_rec} {float(score):.4f}' # for one_rec, score in zip(rec_res, scores)] out_dict = {i: {'rec_txt': rec, 'score': score} for i, (rec, score) in enumerate(zip(rec_res, scores))} return img_save_path, out_dict, out_log if __name__ == '__main__': examples = [['images/1.jpg'], ['images/ch_en_num.jpg'], ['images/air_ticket.jpg'], ['images/car_plate.jpeg'], ['images/idcard.jpg'], ['images/train_ticket.jpeg'], ['images/japan_2.jpg'], ['images/korean_1.jpg']] with gr.Blocks(title='RapidOCR') as demo: gr.Markdown("""