OCR-image-to-text

Sleeping

File size: 2,664 Bytes

import uvicorn
from fastapi import FastAPI, UploadFile, File, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from typing import Optional
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import io
import logging

class OCRAPIApp:
    def __init__(self):
        self.app = FastAPI(
            docs_url="/",
            title="OCR API",
            version="1.0",
        )
        self.setup_routes()
        self.paddle_ocr = PaddleOCR(lang='en', use_angle_cls=True)
        self.doctr_model = ocr_predictor(pretrained=True)

    def ocr_with_paddle(self, img):
        try:
            logging.info("Processing image with PaddleOCR...")
            result = self.paddle_ocr.ocr(img)
            text_output = ' '.join([line[1][0] for line in result[0]])
            return text_output
        except Exception as e:
            logging.error(f"Error with PaddleOCR: {e}")
            raise HTTPException(status_code=500, detail="Error processing image")

    def ocr_with_doctr(self, file):
        try:
            logging.info("Processing PDF with Doctr...")
            doc = DocumentFile.from_pdf(file)
            result = self.doctr_model(doc)
            text_output = ''
            for page in result.pages:
                for block in page.blocks:
                    for line in block.lines:
                        text_output += ' '.join([word.value for word in line.words]) + "\n"
            return text_output
        except Exception as e:
            logging.error(f"Error with Doctr: {e}")
            raise HTTPException(status_code=500, detail="Error processing PDF")

    async def ocr_endpoint(self, file: UploadFile = File(...)):
        try:
            file_bytes = await file.read()
            if file.filename.endswith(".pdf"):
                text_output = self.ocr_with_doctr(io.BytesIO(file_bytes))
            else:
                img = np.array(Image.open(io.BytesIO(file_bytes)))
                text_output = self.ocr_with_paddle(img)
            return {"ocr_text": text_output}
        except Exception as e:
            logging.error(f"Error processing file: {e}")
            raise HTTPException(status_code=500, detail="Error processing file")

    def setup_routes(self):
        self.app.post("/ocr")(self.ocr_endpoint)

# Initialize the app
app = OCRAPIApp().app

# Add CORS middleware for cross-origin requests
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)
if __name__ == "__main__":
   python -m app