Spaces:

RapidAI
/

RapidOCR

Running

File size: 4,805 Bytes

5d6a0bb
829289b
 
 
 
5d6a0bb
 
 
00e3b6c
5d6a0bb
 
9792e33
5d6a0bb
 
00e3b6c
5d6a0bb
 
 
 
 
 
 
 
 
 
 
 
00e3b6c
5d6a0bb
 
 
 
 
00e3b6c
 
5d6a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
00e3b6c
 
5d6a0bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9792e33
461c5b6
5d6a0bb
 
00e3b6c
 
 
 
 
 
 
 
9792e33
461c5b6
00e3b6c
5d6a0bb
 
 
00e3b6c
 
 
5d6a0bb
 
461c5b6
 
 
 
 
 
 
 
 
5d6a0bb

# -*- encoding: utf-8 -*-
import os

os.system('pip install -r requirements.txt')

import math
import random
import time
from pathlib import Path

import cv2
import gradio as gr
import numpy as np
from PIL import Image, ImageDraw, ImageFont
from rapidocr_onnxruntime import RapidOCR


def draw_ocr_box_txt(image, boxes, txts, font_path,
                     scores=None, text_score=0.5):
    h, w = image.height, image.width
    img_left = image.copy()
    img_right = Image.new('RGB', (w, h), (255, 255, 255))

    random.seed(0)
    draw_left = ImageDraw.Draw(img_left)
    draw_right = ImageDraw.Draw(img_right)
    for idx, (box, txt) in enumerate(zip(boxes, txts)):
        if scores is not None and float(scores[idx]) < text_score:
            continue

        color = (random.randint(0, 255),
                 random.randint(0, 255),
                 random.randint(0, 255))

        box = [tuple(v) for v in box]
        draw_left.polygon(box, fill=color)
        draw_right.polygon([box[0][0], box[0][1],
                            box[1][0], box[1][1],
                            box[2][0], box[2][1],
                            box[3][0], box[3][1]],
                           outline=color)

        box_height = math.sqrt((box[0][0] - box[3][0])**2
                               + (box[0][1] - box[3][1])**2)

        box_width = math.sqrt((box[0][0] - box[1][0])**2
                              + (box[0][1] - box[1][1])**2)

        if box_height > 2 * box_width:
            font_size = max(int(box_width * 0.9), 10)
            font = ImageFont.truetype(font_path, font_size,
                                      encoding="utf-8")
            cur_y = box[0][1]
            for c in txt:
                char_size = font.getsize(c)
                draw_right.text((box[0][0] + 3, cur_y), c,
                                fill=(0, 0, 0), font=font)
                cur_y += char_size[1]
        else:
            font_size = max(int(box_height * 0.8), 10)
            font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
            draw_right.text([box[0][0], box[0][1]], txt,
                            fill=(0, 0, 0), font=font)

    img_left = Image.blend(image, img_left, 0.5)
    img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
    img_show.paste(img_left, (0, 0, w, h))
    img_show.paste(img_right, (w, 0, w * 2, h))
    return np.array(img_show)


def visualize(image_path, boxes, txts, scores,
              font_path="./FZYTK.TTF"):
    image = Image.open(image_path)

    draw_img = draw_ocr_box_txt(image, boxes,
                                txts, font_path,
                                scores,
                                text_score=0.5)

    draw_img_save = Path("./inference_results/")
    if not draw_img_save.exists():
        draw_img_save.mkdir(parents=True, exist_ok=True)

    time_stamp = time.strftime('%Y-%m-%d-%H-%M-%S', time.localtime(time.time()))
    image_save = str(draw_img_save / f'{time_stamp}_{Path(image_path).name}')
    cv2.imwrite(image_save, draw_img[:, :, ::-1])
    return image_save


def inference(img, box_thresh, unclip_ratio, text_score):
    img_path = img.name
    img = cv2.imread(img_path)
    ocr_result, _ = rapid_ocr(img, box_thresh=box_thresh,
                              unclip_ratio=unclip_ratio,
                              text_score=text_score)
    dt_boxes, rec_res, scores = list(zip(*ocr_result))
    img_save_path = visualize(img_path, dt_boxes, rec_res, scores)
    output_text = [f'{one_rec} {float(score):.4f}'
                   for one_rec, score in zip(rec_res, scores)]
    return img_save_path, output_text


title = 'RapidOCR Demo (捷智OCR)'
description = 'Gradio demo for RapidOCR. Github Repo: https://github.com/RapidAI/RapidOCR'
article = "<p style='text-align: center'> Completely open source, free and support offline deployment of multi-platform and multi-language OCR SDK <a href='https://github.com/RapidAI/RapidOCR'>Github Repo</a></p>"
css = ".output_image, .input_image {height: 40rem !important; width: 100% !important;}"

rapid_ocr = RapidOCR()

gr.Interface(
    inference,
    inputs=[
        gr.inputs.Image(type='file', label='Input'),
        gr.Slider(minimum=0, maximum=1.0, value=0.5,
                  label='box_thresh', step=0.1),
        gr.Slider(minimum=1.5, maximum=2.0, value=1.6,
                  label='unclip_ratio', step=0.1),
        gr.Slider(minimum=0, maximum=1.0, value=0.5,
                  label='text_score', step=0.1),
    ],
    outputs=[
        gr.outputs.Image(type='file', label='Output_image'),
        gr.outputs.Textbox(type='text', label='Output_text')
    ],
    title=title,
    description=description,
    article=article,
    css=css,
    allow_flagging='never',
    ).launch(debug=True, enable_queue=True)