|
from fastapi import FastAPI, File, UploadFile |
|
import numpy as np |
|
from PIL import Image |
|
from paddleocr import PaddleOCR |
|
from doctr.io import DocumentFile |
|
from doctr.models import ocr_predictor |
|
import io |
|
import os |
|
|
|
app = FastAPI() |
|
|
|
|
|
os.environ['USE_TORCH'] = 'YES' |
|
os.environ['USE_TF'] = 'NO' |
|
ocr_model = ocr_predictor(pretrained=True) |
|
|
|
def ocr_with_doctr(file): |
|
text_output = '' |
|
|
|
|
|
doc = DocumentFile.from_pdf(file) |
|
|
|
|
|
result = ocr_model(doc) |
|
|
|
|
|
for page in result.pages: |
|
for block in page.blocks: |
|
for line in block.lines: |
|
text_output += " ".join([word.value for word in line.words]) + "\n" |
|
|
|
return text_output |
|
|
|
def ocr_with_paddle(img): |
|
finaltext = '' |
|
ocr = PaddleOCR(lang='en', use_angle_cls=True) |
|
result = ocr.ocr(img) |
|
|
|
for i in range(len(result[0])): |
|
text = result[0][i][1][0] |
|
finaltext += ' ' + text |
|
return finaltext |
|
|
|
|
|
def generate_text_from_image(img): |
|
text_output = '' |
|
text_output = ocr_with_paddle(img) |
|
return text_output |
|
|
|
@app.post("/ocr/") |
|
async def perform_ocr(file: UploadFile = File(...)): |
|
|
|
file_bytes = await file.read() |
|
|
|
if file.filename.endswith('.pdf'): |
|
text_output = ocr_with_doctr(io.BytesIO(file_bytes)) |
|
else: |
|
img = np.array(Image.open(io.BytesIO(file_bytes))) |
|
text_output = generate_text_from_image(img) |
|
|
|
return {"ocr_text": text_output} |
|
|