File size: 4,155 Bytes
23c697b
 
6dd2bd7
23c697b
cfa0911
4496186
23c697b
 
6dd2bd7
23c697b
 
 
cfa0911
 
 
 
 
 
 
 
23c697b
 
 
 
 
 
 
 
 
 
 
6dd2bd7
23c697b
6dd2bd7
 
 
 
 
 
fb4c874
 
 
 
 
 
 
 
 
 
 
6dd2bd7
 
 
 
4496186
 
 
6dd2bd7
4496186
6dd2bd7
 
 
 
 
 
 
 
 
 
 
23c697b
4496186
23c697b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4496186
 
4e795bb
 
 
4496186
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import io
import time
import PIL.Image
from fastapi import FastAPI, File, UploadFile, HTTPException, Request
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes
from google import genai
from google.genai.errors import ClientError

app = FastAPI(title="PDF/Image Text Extraction API")

# Global exception handler to always return JSON responses
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    return JSONResponse(
        status_code=500,
        content={"detail": str(exc)}
    )

# Retrieve the API key from an environment variable.
API_KEY = os.getenv("API_KEY")
if not API_KEY:
    raise ValueError("API_KEY environment variable is not set")

# Initialize the GenAI client.
client = genai.Client(api_key=API_KEY)

def extract_text_from_image(img):
    """
    Extracts text from a PIL image using the Google GenAI API.
    Includes error handling for RESOURCE_EXHAUSTED errors.
    """
    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = client.models.generate_content(
                model="gemini-2.0-flash",
                contents=[
                   """Extract all visible text from this image and preserve the original layout and formatting as accurately as possible.

- Maintain line breaks, indentation, and paragraph spacing.
- Do not merge or reflow text from multiple lines into a single line.
- Preserve bullet points, numbering, punctuation, and symbols exactly as shown.
- Reproduce alignment (left/center/right) where possible.
- For tabular or columnar data, preserve column spacing and structure.
- Do not summarize or interpret the content. Just return the raw extracted text exactly as it appears in the image.

Return only the extracted content. Do not add explanations, headers, or any additional comments.""",
                   img,
                ]
            )
            return response.text
        except ClientError as e:
            # Extract error code from the exception arguments
            error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None
            if error_code == 429:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff before retrying
                    continue
                else:
                    raise HTTPException(
                        status_code=503,
                        detail="API resource exhausted. Please try again later."
                    )
            else:
                raise HTTPException(
                    status_code=500,
                    detail=f"Error processing image: {str(e)}"
                )

@app.post("/upload", summary="Upload a PDF or image file", response_description="Returns extracted text as JSON")
async def upload_file(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")
    
    # Read file content.
    file_contents = await file.read()
    output_text = ""
    
    if file.filename.lower().endswith(".pdf"):
        try:
            # Convert PDF bytes to images.
            images = convert_from_bytes(file_contents, dpi=200)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error converting PDF: {str(e)}")
        
        # Process each page.
        for idx, img in enumerate(images, start=1):
            page_text = extract_text_from_image(img)
            output_text += f"### Page {idx}\n\n{page_text}\n\n"
    else:
        try:
            # Process the file as an image.
            img = PIL.Image.open(io.BytesIO(file_contents))
        except Exception as e:
            raise HTTPException(status_code=400, detail="Uploaded file is not a valid image")
        
        output_text += extract_text_from_image(img) + "\n\n"
    
    # Return the extracted text in a JSON response.
    return JSONResponse(content={"extracted_text": output_text})

@app.get("/", summary="Health Check")
async def root():
    return JSONResponse(content={"message": "API is up and running."})