Spaces:
Running
Running
# -*- encoding: utf-8 -*- | |
# @Author: OpenOCR | |
# @Contact: [email protected] | |
import os | |
import gradio as gr # gradio==4.20.0 | |
os.environ['FLAGS_allocator_strategy'] = 'auto_growth' | |
import cv2 | |
import numpy as np | |
import json | |
import time | |
from PIL import Image | |
from tools.infer_e2e import OpenOCR, check_and_download_font, draw_ocr_box_txt | |
drop_score = 0.01 | |
text_sys = OpenOCR(drop_score=drop_score) | |
# warm up 5 times | |
if True: | |
img = np.random.uniform(0, 255, [640, 640, 3]).astype(np.uint8) | |
for i in range(5): | |
res = text_sys(img_numpy=img) | |
font_path = './simfang.ttf' | |
check_and_download_font(font_path) | |
def main(input_image, | |
rec_drop_score=0.01, | |
mask_thresh=0.3, | |
box_thresh=0.6, | |
unclip_ratio=1.5, | |
det_score_mode='slow'): | |
img = input_image[:, :, ::-1] | |
starttime = time.time() | |
results, time_dict, mask = text_sys(img_numpy=img, | |
return_mask=True, | |
thresh=mask_thresh, | |
box_thresh=box_thresh, | |
unclip_ratio=unclip_ratio, | |
score_mode=det_score_mode) | |
elapse = time.time() - starttime | |
save_pred = json.dumps(results[0], ensure_ascii=False) | |
image = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
boxes = [res['points'] for res in results[0]] | |
txts = [res['transcription'] for res in results[0]] | |
scores = [res['score'] for res in results[0]] | |
draw_img = draw_ocr_box_txt( | |
image, | |
boxes, | |
txts, | |
scores, | |
drop_score=rec_drop_score, | |
font_path=font_path, | |
) | |
mask = mask[0, 0, :, :] > mask_thresh | |
return save_pred, elapse, draw_img, mask.astype('uint8') * 255 | |
def get_all_file_names_including_subdirs(dir_path): | |
all_file_names = [] | |
for root, dirs, files in os.walk(dir_path): | |
for file_name in files: | |
all_file_names.append(os.path.join(root, file_name)) | |
file_names_only = [os.path.basename(file) for file in all_file_names] | |
return file_names_only | |
def list_image_paths(directory): | |
image_extensions = ('.png', '.jpg', '.jpeg', '.gif', '.bmp', '.tiff') | |
image_paths = [] | |
for root, dirs, files in os.walk(directory): | |
for file in files: | |
if file.lower().endswith(image_extensions): | |
relative_path = os.path.relpath(os.path.join(root, file), | |
directory) | |
full_path = os.path.join(directory, relative_path) | |
image_paths.append(full_path) | |
image_paths = sorted(image_paths) | |
return image_paths | |
def find_file_in_current_dir_and_subdirs(file_name): | |
for root, dirs, files in os.walk('.'): | |
if file_name in files: | |
relative_path = os.path.join(root, file_name) | |
return relative_path | |
e2e_img_example = list_image_paths('./OCR_e2e_img') | |
if __name__ == '__main__': | |
css = '.image-container img { width: 100%; max-height: 320px;}' | |
with gr.Blocks(css=css) as demo: | |
gr.HTML(""" | |
<h1 style='text-align: center;'><a href="https://github.com/Topdu/OpenOCR">OpenOCR</a></h1> | |
<p style='text-align: center;'>A general OCR system with accuracy and efficiency (created by <a href="https://github.com/Topdu/OpenOCR">OCR Team</a>, <a href="https://fvl.fudan.edu.cn">FVL Lab</a>)</p>""") | |
with gr.Row(): | |
with gr.Column(scale=1): | |
input_image = gr.Image(label='Input image', | |
elem_classes=['image-container']) | |
examples = gr.Examples(examples=e2e_img_example, | |
inputs=input_image, | |
label='Examples') | |
downstream = gr.Button('Run') | |
with gr.Row(): | |
with gr.Column(): | |
rec_drop_score_slider = gr.Slider( | |
0.0, | |
1.0, | |
value=0.01, | |
step=0.01, | |
label="Recognition Drop Score", | |
info="Recognition confidence threshold, default value is 0.01. Recognition results and corresponding text boxes lower than this threshold are discarded.") | |
mask_thresh_slider = gr.Slider( | |
0.0, | |
1.0, | |
value=0.3, | |
step=0.01, | |
label="Mask Threshold", | |
info="Mask threshold for binarizing masks, defaults to 0.3, turn it down if there is text truncation.") | |
with gr.Column(): | |
box_thresh_slider = gr.Slider( | |
0.0, | |
1.0, | |
value=0.6, | |
step=0.01, | |
label="Box Threshold", | |
info="Text Box Confidence Threshold, default value is 0.6, turn it down if there is text being missed.") | |
unclip_ratio_slider = gr.Slider( | |
1.5, | |
2.0, | |
value=1.5, | |
step=0.05, | |
label="Unclip Ratio", | |
info="Expansion factor for parsing text boxes, default value is 1.5. The larger the value, the larger the text box.") | |
det_score_mode_dropdown = gr.Dropdown( | |
["slow", "fast"], | |
value="slow", | |
label="Det Score Mode", | |
info="The confidence calculation mode of the text box, the default is slow. Slow mode is slower but more accurate. Fast mode is faster but less accurate." | |
) | |
with gr.Column(scale=1): | |
img_mask = gr.Image(label='mask', | |
interactive=False, | |
elem_classes=['image-container']) | |
img_output = gr.Image(label=' ', | |
interactive=False, | |
elem_classes=['image-container']) | |
output = gr.Textbox(label='Result') | |
confidence = gr.Textbox(label='Latency') | |
downstream.click(fn=main, | |
inputs=[ | |
input_image, rec_drop_score_slider, | |
mask_thresh_slider, box_thresh_slider, | |
unclip_ratio_slider, det_score_mode_dropdown | |
], | |
outputs=[ | |
output, | |
confidence, | |
img_output, | |
img_mask, | |
]) | |
demo.launch(share=True) | |