File size: 3,567 Bytes
23c697b
 
6dd2bd7
23c697b
cfa0911
4496186
23c697b
 
6dd2bd7
23c697b
 
 
cfa0911
 
 
 
 
 
 
 
23c697b
 
 
 
 
 
 
 
 
 
 
6dd2bd7
23c697b
6dd2bd7
 
 
 
 
 
 
 
 
 
 
 
4496186
 
 
6dd2bd7
4496186
6dd2bd7
 
 
 
 
 
 
 
 
 
 
23c697b
4496186
23c697b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4496186
 
4e795bb
 
 
4496186
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
import os
import io
import time
import PIL.Image
from fastapi import FastAPI, File, UploadFile, HTTPException, Request
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes
from google import genai
from google.genai.errors import ClientError

app = FastAPI(title="PDF/Image Text Extraction API")

# Global exception handler to always return JSON responses
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
    return JSONResponse(
        status_code=500,
        content={"detail": str(exc)}
    )

# Retrieve the API key from an environment variable.
API_KEY = os.getenv("API_KEY")
if not API_KEY:
    raise ValueError("API_KEY environment variable is not set")

# Initialize the GenAI client.
client = genai.Client(api_key=API_KEY)

def extract_text_from_image(img):
    """
    Extracts text from a PIL image using the Google GenAI API.
    Includes error handling for RESOURCE_EXHAUSTED errors.
    """
    max_retries = 3
    for attempt in range(max_retries):
        try:
            response = client.models.generate_content(
                model="gemini-2.0-flash",
                contents=[
                    "Extract the text from the image. Do not write anything except the extracted content",
                    img,
                ]
            )
            return response.text
        except ClientError as e:
            # Extract error code from the exception arguments
            error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None
            if error_code == 429:
                if attempt < max_retries - 1:
                    time.sleep(2 ** attempt)  # Exponential backoff before retrying
                    continue
                else:
                    raise HTTPException(
                        status_code=503,
                        detail="API resource exhausted. Please try again later."
                    )
            else:
                raise HTTPException(
                    status_code=500,
                    detail=f"Error processing image: {str(e)}"
                )

@app.post("/upload", summary="Upload a PDF or image file", response_description="Returns extracted text as JSON")
async def upload_file(file: UploadFile = File(...)):
    if not file.filename:
        raise HTTPException(status_code=400, detail="No file provided")
    
    # Read file content.
    file_contents = await file.read()
    output_text = ""
    
    if file.filename.lower().endswith(".pdf"):
        try:
            # Convert PDF bytes to images.
            images = convert_from_bytes(file_contents, dpi=200)
        except Exception as e:
            raise HTTPException(status_code=500, detail=f"Error converting PDF: {str(e)}")
        
        # Process each page.
        for idx, img in enumerate(images, start=1):
            page_text = extract_text_from_image(img)
            output_text += f"### Page {idx}\n\n{page_text}\n\n"
    else:
        try:
            # Process the file as an image.
            img = PIL.Image.open(io.BytesIO(file_contents))
        except Exception as e:
            raise HTTPException(status_code=400, detail="Uploaded file is not a valid image")
        
        output_text += extract_text_from_image(img) + "\n\n"
    
    # Return the extracted text in a JSON response.
    return JSONResponse(content={"extracted_text": output_text})

@app.get("/", summary="Health Check")
async def root():
    return JSONResponse(content={"message": "API is up and running."})