File size: 1,526 Bytes
2748615
 
 
 
 
 
 
ae73099
6a63b3c
2748615
6a63b3c
2748615
4cb1fb3
 
0d43dab
2748615
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from fastapi import FastAPI, File, UploadFile
import numpy as np
from PIL import Image
from paddleocr import PaddleOCR
from doctr.io import DocumentFile
from doctr.models import ocr_predictor
import io
import os

app = FastAPI()

# Load the doctr OCR model
os.environ['USE_TORCH'] = 'YES'
os.environ['USE_TF'] = 'NO'
ocr_model = ocr_predictor(pretrained=True)

def ocr_with_doctr(file):
    text_output = ''
    
    # Load the document
    doc = DocumentFile.from_pdf(file)
    
    # Perform OCR
    result = ocr_model(doc)
    
    # Extract text from OCR result
    for page in result.pages:
        for block in page.blocks:
            for line in block.lines:
                text_output += " ".join([word.value for word in line.words]) + "\n"
    
    return text_output

def ocr_with_paddle(img):
    finaltext = ''
    ocr = PaddleOCR(lang='en', use_angle_cls=True)
    result = ocr.ocr(img)
    
    for i in range(len(result[0])):
        text = result[0][i][1][0]
        finaltext += ' ' + text
    return finaltext


def generate_text_from_image(img):
    text_output = ''
    text_output = ocr_with_paddle(img)
    return text_output

@app.post("/ocr/")
async def perform_ocr(file: UploadFile = File(...)):
   
    file_bytes = await file.read()
    
    if file.filename.endswith('.pdf'):
        text_output = ocr_with_doctr(io.BytesIO(file_bytes))
    else:
        img = np.array(Image.open(io.BytesIO(file_bytes)))
        text_output = generate_text_from_image(img)

    return {"ocr_text": text_output}