Spaces:
Running
Running
import numpy as np | |
import gradio as gr | |
from transformers import AutoFeatureExtractor, AutoTokenizer, VisionEncoderDecoderModel | |
import re | |
import jaconv | |
#load model | |
model_path = "model/" | |
feature_extractor = AutoFeatureExtractor.from_pretrained(model_path) | |
tokenizer = AutoTokenizer.from_pretrained(model_path) | |
model = VisionEncoderDecoderModel.from_pretrained(model_path) | |
examples = ['examples/01.png', 'examples/02.png', 'examples/03.png', | |
'examples/04.png', 'examples/05.png', 'examples/06.png', | |
'examples/07.png' | |
] | |
def post_process(text): | |
text = ''.join(text.split()) | |
text = text.replace('…', '...') | |
text = re.sub('[・.]{2,}', lambda x: (x.end() - x.start()) * '.', text) | |
text = jaconv.h2z(text, ascii=True, digit=True) | |
return text | |
def infer(image): | |
image = image.convert('L').convert('RGB') | |
pixel_values = feature_extractor(image, return_tensors="pt").pixel_values | |
ouput = model.generate(pixel_values)[0] | |
text = tokenizer.decode(ouput, skip_special_tokens=True) | |
text = post_process(text) | |
return text | |
iface = gr.Interface( | |
fn=infer, | |
inputs=[gr.inputs.Image(label="Input", type="pil")], | |
outputs="text", | |
layout="horizontal", | |
theme="huggingface", | |
title="Optical Character Recognition for Japanese Text", | |
description="A simple interface for OCR from Japanese manga", | |
article= "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>. ", | |
allow_flagging='never', | |
examples=examples, | |
cache_examples=True, | |
) | |
iface.launch(enable_queue=True) | |