RapidOCR / app.py
SWHL's picture
Update app.py
a3586f8
raw
history blame
7.97 kB
# -*- encoding: utf-8 -*-
import os
os.system('pip install -r requirements.txt')
import math
import random
import time
from pathlib import Path
import cv2
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from rapidocr_onnxruntime import RapidOCR
def draw_ocr_box_txt(image, boxes, txts, font_path,
scores=None, text_score=0.5):
h, w = image.height, image.width
img_left = image.copy()
img_right = Image.new('RGB', (w, h), (255, 255, 255))
random.seed(0)
draw_left = ImageDraw.Draw(img_left)
draw_right = ImageDraw.Draw(img_right)
for idx, (box, txt) in enumerate(zip(boxes, txts)):
if scores is not None and float(scores[idx]) < text_score:
continue
color = (random.randint(0, 255),
random.randint(0, 255),
random.randint(0, 255))
box = [tuple(v) for v in box]
draw_left.polygon(box, fill=color)
draw_right.polygon([box[0][0], box[0][1],
box[1][0], box[1][1],
box[2][0], box[2][1],
box[3][0], box[3][1]],
outline=color)
box_height = math.sqrt((box[0][0] - box[3][0])**2
+ (box[0][1] - box[3][1])**2)
box_width = math.sqrt((box[0][0] - box[1][0])**2
+ (box[0][1] - box[1][1])**2)
if box_height > 2 * box_width:
font_size = max(int(box_width * 0.9), 10)
font = ImageFont.truetype(font_path, font_size,
encoding="utf-8")
cur_y = box[0][1]
for c in txt:
char_size = font.getsize(c)
draw_right.text((box[0][0] + 3, cur_y), c,
fill=(0, 0, 0), font=font)
cur_y += char_size[1]
else:
font_size = max(int(box_height * 0.8), 10)
font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
draw_right.text([box[0][0], box[0][1]], txt,
fill=(0, 0, 0), font=font)
img_left = Image.blend(image, img_left, 0.5)
img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
img_show.paste(img_left, (0, 0, w, h))
img_show.paste(img_right, (w, 0, w * 2, h))
return np.array(img_show)
def visualize(image_path, boxes, txts, scores,
font_path="./FZYTK.TTF"):
image = Image.open(image_path)
draw_img = draw_ocr_box_txt(image, boxes,
txts, font_path,
scores,
text_score=0.5)
draw_img_save = Path("./inference_results/")
if not draw_img_save.exists():
draw_img_save.mkdir(parents=True, exist_ok=True)
time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}')
cv2.imwrite(image_save, draw_img[:, :, ::-1])
return image_save
def inference(img_path, box_thresh=0.5, unclip_ratio=1.6, text_score=0.5):
img = cv2.imread(img_path)
ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh,
unclip_ratio=unclip_ratio,
text_score=text_score)
dt_boxes, rec_res, scores = list(zip(*ocr_result))
img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
output_text = [f'{one_rec} {float(score):.4f}'
for one_rec, score in zip(rec_res, scores)]
return img_save_path, output_text
title = 'RapidOCR Demo (捷智OCR)'
description = """Github Repo: [RapidOCR](https://github.com/RapidAI/RapidOCR)
Docs: [Docs](https://rapidocr.rtfd.io/)
Parameters docs: [link](https://github.com/RapidAI/RapidOCR/tree/main/python#configyaml%E4%B8%AD%E5%B8%B8%E7%94%A8%E5%8F%82%E6%95%B0%E4%BB%8B%E7%BB%8D)
box_thresh: 检测到的框是文本的概率,值越大,框中是文本的概率就越大。存在漏检时,调低该值。取值范围:[0, 1.0]
unclip_ratio: 控制文本检测框的大小,值越大,检测框整体越大。在出现框截断文字的情况,调大该值。取值范围:[1.5, 2.0]
text_score: 文本识别结果是正确的置信度,值越大,显示出的识别结果更准确。存在漏检时,调低该值。取值范围:[0, 1.0]
"""
article = """<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>
<p align="left">
<a href="https://rapidai.deepdatasec.com:9003/" target="_blank"><img src="https://img.shields.io/badge/%E2%9A%A1%EF%B8%8E-Online%20Demo-blue"></a>
<a href="https://huggingface.co/spaces/SWHL/RapidOCRDemo" target="_blank"><img src="https://img.shields.io/badge/%F0%9F%A4%97-Hugging Face Demo-blue"></a>
<a href="https://colab.research.google.com/github/RapidAI/RapidOCR/blob/main/assets/RapidOCRDemo.ipynb" target="_blank"><img src="https://raw.githubusercontent.com/RapidAI/RapidOCR/main/assets/colab-badge.svg" alt="Open in Colab"></a>
<a href="https://aistudio.baidu.com/aistudio/projectdetail/4444785?sUid=57084&shared=1&ts=1660896122332" target="_blank"><img src="https://img.shields.io/badge/PP-Open in AI Studio-blue.svg"></a><br/>
<a href=""><img src="https://img.shields.io/badge/Python->=3.7,<=3.10-aff.svg"></a>
<a href=""><img src="https://img.shields.io/badge/OS-Linux%2C%20Win%2C%20Mac-pink.svg"></a>
<a href="https://github.com/RapidAI/RapidOCR/graphs/contributors"><img src="https://img.shields.io/github/contributors/RapidAI/RapidOCR?color=9ea"></a>
<a href="https://pepy.tech/project/rapidocr_onnxruntime"><img src="https://static.pepy.tech/personalized-badge/rapidocr_onnxruntime?period=total&units=abbreviation&left_color=grey&right_color=blue&left_text=Downloads%20Ort"></a>
<a href="https://pypi.org/project/rapidocr-onnxruntime/"><img alt="PyPI" src="https://img.shields.io/pypi/v/rapidocr-onnxruntime"></a>
<a href="https://github.com/RapidAI/RapidOCR/stargazers"><img src="https://img.shields.io/github/stars/RapidAI/RapidOCR?color=ccf"></a>
<a href="https://semver.org/"><img alt="SemVer2.0" src="https://img.shields.io/badge/SemVer-2.0-brightgreen"></a>
<a href='https://rapidocr.readthedocs.io/en/latest/?badge=latest'>
<img src='https://readthedocs.org/projects/rapidocr/badge/?version=latest' alt='Documentation Status' />
</a>
</p>
"""
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"
examples = [['images/1.jpg']]
rapid_ocr = RapidOCR()
gr.Interface(
inference,
inputs=[
gr.inputs.Image(type='filepath', label='Input'),
gr.Slider(minimum=0, maximum=1.0, value=0.5,
label='box_thresh', step=0.1,
info='检测到的框是文本的概率,值越大,框中是文本的概率就越大。存在漏检时,调低该值。取值范围:[0, 1.0]'),
gr.Slider(minimum=1.5, maximum=2.0, value=1.6,
label='unclip_ratio', step=0.1,
info='控制文本检测框的大小,值越大,检测框整体越大。在出现框截断文字的情况,调大该值。取值范围:[1.5, 2.0]'),
gr.Slider(minimum=0, maximum=1.0, value=0.5,
label='text_score', step=0.1,
info='文本识别结果是正确的置信度,值越大,显示出的识别结果更准确。存在漏检时,调低该值。取值范围:[0, 1.0]'),
],
outputs=[
gr.outputs.Image(type='filepath', label='Output_image'),
gr.outputs.Textbox(type='text', label='Output_text')
],
title=title,
description=description,
examples=examples,
article=article,
css=css,
allow_flagging='never',
).launch(debug=True, enable_queue=True)