OCR-image-to-text

Sleeping

App Files Files Community

Genzo1010 commited on Sep 11, 2024

Commit

0168e15

verified ·

1 Parent(s): dadc526

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -108

app.py CHANGED Viewed

@@ -1,120 +1,57 @@
-import gradio as gr
-import tensorflow as tf
-import keras_ocr
-import requests
-import cv2
-import os
-import csv
 import numpy as np
-import pandas as pd
-import huggingface_hub
-from huggingface_hub import Repository
-from datetime import datetime
-import scipy.ndimage.interpolation as inter
-import easyocr
-import datasets
-from datasets import load_dataset, Image
 from PIL import Image
 from paddleocr import PaddleOCR
-from save_data import flag
-"""
-Paddle OCR
-"""
 def ocr_with_paddle(img):
     finaltext = ''
-    ocr = PaddleOCR(lang='en', use_angle_cls=True)
-    # img_path = 'exp.jpeg'
-    result = ocr.ocr(img)
     for i in range(len(result[0])):
         text = result[0][i][1][0]
-        finaltext += ' '+ text
     return finaltext
-"""
-Keras OCR
-"""
-def ocr_with_keras(img):
-    output_text = ''
-    pipeline=keras_ocr.pipeline.Pipeline()
-    images=[keras_ocr.tools.read(img)]
-    predictions=pipeline.recognize(images)
-    first=predictions[0]
-    for text,box in first:
-        output_text += ' '+ text
-    return output_text
-"""
-easy OCR
-"""
-# gray scale image
-def get_grayscale(image):
-    return cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
-# Thresholding or Binarization
-def thresholding(src):
-    return cv2.threshold(src,127,255, cv2.THRESH_TOZERO)[1]
-def ocr_with_easy(img):
-    gray_scale_image=get_grayscale(img)
-    thresholding(gray_scale_image)
-    cv2.imwrite('image.png',gray_scale_image)
-    reader = easyocr.Reader(['th','en'])
-    bounds = reader.readtext('image.png',paragraph="False",detail = 0)
-    bounds = ''.join(bounds)
-    return bounds
-"""
-Generate OCR
-"""
-def generate_ocr(Method,img):
-    text_output = ''
-    if (img).any():
-        add_csv = []
-        image_id = 1
-        print("Method___________________",Method)
-        if Method == 'EasyOCR':
-            text_output = ocr_with_easy(img)
-        if Method == 'KerasOCR':
-            text_output = ocr_with_keras(img)
-        if Method == 'PaddleOCR':
-            text_output = ocr_with_paddle(img)
-        try:
-            flag(Method,text_output,img)
-        except Exception as e:
-            print(e)
-        return text_output
     else:
-        raise gr.Error("Please upload an image!!!!")
-    # except Exception as e:
-    #     print("Error in ocr generation ==>",e)
-    #     text_output = "Something went wrong"
-    # return text_output
-"""
-Create user interface for OCR demo
-"""
-# image = gr.Image(shape=(300, 300))
-image = gr.Image()
-method = gr.Radio(["PaddleOCR","EasyOCR", "KerasOCR"],value="PaddleOCR")
-output = gr.Textbox(label="Output")
-demo = gr.Interface(
-    generate_ocr,
-    [method,image],
-    output,
-    title="Optical Character Recognition",
-    css=".gradio-container {background-color: lightgray} #radio_div {background-color: #FFD8B4; font-size: 40px;}",
-    article = """<p style='text-align: center;'>Feel free to give us your thoughts on this demo and please contact us at
-                    <a href="mailto:[email protected]" target="_blank">[email protected]</a>
-                    <p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>"""
-)
-# demo.launch(enable_queue = False)
-demo.launch(show_error=True)

+from fastapi import FastAPI, File, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.gzip import GZipMiddleware
 import numpy as np
 from PIL import Image
 from paddleocr import PaddleOCR
+from doctr.io import DocumentFile
+from doctr.models import ocr_predictor
+import io
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"]
+)
+# Initialize models once at startup
+ocr_model = ocr_predictor(pretrained=True)
+paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)
+def ocr_with_doctr(file):
+    text_output = ''
+    doc = DocumentFile.from_pdf(file)
+    result = ocr_model(doc)
+    for page in result.pages:
+        for block in page.blocks:
+            for line in block.lines:
+                text_output += " ".join([word.value for word in line.words]) + "\n"
+    return text_output
 def ocr_with_paddle(img):
     finaltext = ''
+    result = paddle_ocr.ocr(img)
     for i in range(len(result[0])):
         text = result[0][i][1][0]
+        finaltext += ' ' + text
     return finaltext
+def generate_text_from_image(img):
+    return ocr_with_paddle(img)
+@app.post("/ocr/")
+async def perform_ocr(file: UploadFile = File(...)):
+    file_bytes = await file.read()
+    if file.filename.endswith('.pdf'):
+        text_output = ocr_with_doctr(io.BytesIO(file_bytes))
     else:
+        img = np.array(Image.open(io.BytesIO(file_bytes)))
+        text_output = generate_text_from_image(img)
+    return {"ocr_text": text_output}
+@app.get("/test/")
+async def test_call():
+    return {"message": "Hi. I'm running"}