demo-gpu

Sleeping

App Files Files Community

vteam27 commited on Apr 18, 2024

Commit

884f6b2

1 Parent(s): 45a9477

added searchable pdf for english ocr

Browse files

Files changed (1) hide show

app.py +18 -4

app.py CHANGED Viewed

@@ -7,6 +7,8 @@ from doctr.io import DocumentFile
 from doctr.models import ocr_predictor
 import gradio as gr
 from PIL import Image
 from happytransformer import HappyTextToText, TTSettings
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,logging
 from transformers.integrations import deepspeed
@@ -74,13 +76,13 @@ def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):
         res = pt.image_to_string(img,lang='hin')
         _output_name = "RESULT_OCR.txt"
         open(_output_name, 'w').write(res)
-        return res, _output_name
     if (lang_of_input=="Punjabi"):
         res = pt.image_to_string(img,lang='pan')
         _output_name = "RESULT_OCR.txt"
         open(_output_name, 'w').write(res)
-        return res, _output_name
     img.save("out.jpg")
@@ -106,7 +108,19 @@ def greet(img, apply_grammar_correction, apply_spell_check,lang_of_input):
     _output_name = "RESULT_OCR.txt"
     open(_output_name, 'w').write(res)
-    return res, _output_name
 # Gradio Interface for OCR
 demo_ocr = gr.Interface(
@@ -117,7 +131,7 @@ demo_ocr = gr.Interface(
         gr.Checkbox(label="Apply Spell Check"),
         gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
     ],
-    outputs=["text", "file"],
     title="DocTR OCR with Grammar and Spell Check",
     description="Upload an image to get the OCR results. Optionally, apply grammar and spell check.",
     examples=[["Examples/Book.png"], ["Examples/News.png"], ["Examples/Manuscript.jpg"], ["Examples/Files.jpg"],["Examples/Hindi.jpg"],["Examples/Hindi-manu.jpg"],["Examples/Punjabi_machine.png"]]

 from doctr.models import ocr_predictor
 import gradio as gr
 from PIL import Image
+import base64
+from utils import HocrParser
 from happytransformer import HappyTextToText, TTSettings
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM,logging
 from transformers.integrations import deepspeed
         res = pt.image_to_string(img,lang='hin')
         _output_name = "RESULT_OCR.txt"
         open(_output_name, 'w').write(res)
+        return res, _output_name, None
     if (lang_of_input=="Punjabi"):
         res = pt.image_to_string(img,lang='pan')
         _output_name = "RESULT_OCR.txt"
         open(_output_name, 'w').write(res)
+        return res, _output_name, None
     img.save("out.jpg")
     _output_name = "RESULT_OCR.txt"
     open(_output_name, 'w').write(res)
+    # Convert OCR output to searchable PDF
+    _output_name_pdf="RESULT_OCR.pdf"
+    xml_outputs = output.export_as_xml()
+    parser = HocrParser()
+    base64_encoded_pdfs = list()
+    for i, (xml, img) in enumerate(zip(xml_outputs, doc)):
+      xml_element_tree = xml[1]
+      parser.export_pdfa(_output_name_pdf,
+            hocr=xml_element_tree, image=img)
+      with open(_output_name_pdf, 'rb') as f:
+            base64_encoded_pdfs.append(base64.b64encode(f.read()))
+    return res, _output_name, _output_name_pdf
 # Gradio Interface for OCR
 demo_ocr = gr.Interface(
         gr.Checkbox(label="Apply Spell Check"),
         gr.Dropdown(["English","Hindi","Punjabi"],label="Select Language")
     ],
+    outputs=["text", "file", "file"],
     title="DocTR OCR with Grammar and Spell Check",
     description="Upload an image to get the OCR results. Optionally, apply grammar and spell check.",
     examples=[["Examples/Book.png"], ["Examples/News.png"], ["Examples/Manuscript.jpg"], ["Examples/Files.jpg"],["Examples/Hindi.jpg"],["Examples/Hindi-manu.jpg"],["Examples/Punjabi_machine.png"]]