from fastapi import FastAPI, File, UploadFile import numpy as np from PIL import Image from paddleocr import PaddleOCR from doctr.io import DocumentFile from doctr.models import ocr_predictor import io import os app = FastAPI() # Load the doctr OCR model os.environ['USE_TORCH'] = 'YES' os.environ['USE_TF'] = 'NO' ocr_model = ocr_predictor(pretrained=True) def ocr_with_doctr(file): text_output = '' # Load the document doc = DocumentFile.from_pdf(file) # Perform OCR result = ocr_model(doc) # Extract text from OCR result for page in result.pages: for block in page.blocks: for line in block.lines: text_output += " ".join([word.value for word in line.words]) + "\n" return text_output def ocr_with_paddle(img): finaltext = '' ocr = PaddleOCR(lang='en', use_angle_cls=True) result = ocr.ocr(img) for i in range(len(result[0])): text = result[0][i][1][0] finaltext += ' ' + text return finaltext def generate_text_from_image(img): text_output = '' text_output = ocr_with_paddle(img) return text_output @app.post("/ocr/") async def perform_ocr(file: UploadFile = File(...)): file_bytes = await file.read() if file.filename.endswith('.pdf'): text_output = ocr_with_doctr(io.BytesIO(file_bytes)) else: img = np.array(Image.open(io.BytesIO(file_bytes))) text_output = generate_text_from_image(img) return {"ocr_text": text_output}