from typing import List import pytesseract from PIL import Image import gradio as gr def tesseract_ocr(filepath: str, languages: List[str]): image = Image.open(filepath) oem_psm_config = '--oem 3 --psm 11 --tessdata-dir ./tessdata' return pytesseract.image_to_string(image=image, lang='+'.join(languages), config=oem_psm_config) title = "Shan Tesseract OCR" description = "Gradio demo for Tesseract-OCR Shan. Tesseract is an open source text recognition (OCR) Engine." article = "

Tesseract documentation | Github Repo

" examples = [ ["examples/example2.png", ["eng", "shn"]], ["examples/example3.png", ["eng", "shn"]], ["examples/example4.png", ["eng", "shn"]], ["examples/example1.png", ["eng", "shn"]], ] language_choices = pytesseract.get_languages(config='--tessdata-dir ./tessdata') demo = gr.Interface( fn=tesseract_ocr, inputs=[ gr.Image(type="filepath", label="Input"), gr.CheckboxGroup(language_choices, type="value", value=['eng'], label='language') ], outputs='text', title=title, description=description, article=article, examples=examples, ) if __name__ == '__main__': demo.launch() print("Finished running")