Spaces:
Running
Running
File size: 4,155 Bytes
23c697b 6dd2bd7 23c697b cfa0911 4496186 23c697b 6dd2bd7 23c697b cfa0911 23c697b 6dd2bd7 23c697b 6dd2bd7 fb4c874 6dd2bd7 4496186 6dd2bd7 4496186 6dd2bd7 23c697b 4496186 23c697b 4496186 4e795bb 4496186 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
import os
import io
import time
import PIL.Image
from fastapi import FastAPI, File, UploadFile, HTTPException, Request
from fastapi.responses import JSONResponse
from pdf2image import convert_from_bytes
from google import genai
from google.genai.errors import ClientError
app = FastAPI(title="PDF/Image Text Extraction API")
# Global exception handler to always return JSON responses
@app.exception_handler(Exception)
async def global_exception_handler(request: Request, exc: Exception):
return JSONResponse(
status_code=500,
content={"detail": str(exc)}
)
# Retrieve the API key from an environment variable.
API_KEY = os.getenv("API_KEY")
if not API_KEY:
raise ValueError("API_KEY environment variable is not set")
# Initialize the GenAI client.
client = genai.Client(api_key=API_KEY)
def extract_text_from_image(img):
"""
Extracts text from a PIL image using the Google GenAI API.
Includes error handling for RESOURCE_EXHAUSTED errors.
"""
max_retries = 3
for attempt in range(max_retries):
try:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[
"""Extract all visible text from this image and preserve the original layout and formatting as accurately as possible.
- Maintain line breaks, indentation, and paragraph spacing.
- Do not merge or reflow text from multiple lines into a single line.
- Preserve bullet points, numbering, punctuation, and symbols exactly as shown.
- Reproduce alignment (left/center/right) where possible.
- For tabular or columnar data, preserve column spacing and structure.
- Do not summarize or interpret the content. Just return the raw extracted text exactly as it appears in the image.
Return only the extracted content. Do not add explanations, headers, or any additional comments.""",
img,
]
)
return response.text
except ClientError as e:
# Extract error code from the exception arguments
error_code = e.args[0] if e.args and isinstance(e.args[0], int) else None
if error_code == 429:
if attempt < max_retries - 1:
time.sleep(2 ** attempt) # Exponential backoff before retrying
continue
else:
raise HTTPException(
status_code=503,
detail="API resource exhausted. Please try again later."
)
else:
raise HTTPException(
status_code=500,
detail=f"Error processing image: {str(e)}"
)
@app.post("/upload", summary="Upload a PDF or image file", response_description="Returns extracted text as JSON")
async def upload_file(file: UploadFile = File(...)):
if not file.filename:
raise HTTPException(status_code=400, detail="No file provided")
# Read file content.
file_contents = await file.read()
output_text = ""
if file.filename.lower().endswith(".pdf"):
try:
# Convert PDF bytes to images.
images = convert_from_bytes(file_contents, dpi=200)
except Exception as e:
raise HTTPException(status_code=500, detail=f"Error converting PDF: {str(e)}")
# Process each page.
for idx, img in enumerate(images, start=1):
page_text = extract_text_from_image(img)
output_text += f"### Page {idx}\n\n{page_text}\n\n"
else:
try:
# Process the file as an image.
img = PIL.Image.open(io.BytesIO(file_contents))
except Exception as e:
raise HTTPException(status_code=400, detail="Uploaded file is not a valid image")
output_text += extract_text_from_image(img) + "\n\n"
# Return the extracted text in a JSON response.
return JSONResponse(content={"extracted_text": output_text})
@app.get("/", summary="Health Check")
async def root():
return JSONResponse(content={"message": "API is up and running."})
|