Hammad712 commited on
Commit
6dd2bd7
·
verified ·
1 Parent(s): 4e795bb

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +34 -16
main.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
2
  import io
 
3
  import tempfile
4
  import PIL.Image
5
  from fastapi import FastAPI, File, UploadFile, HTTPException
6
- from fastapi.responses import FileResponse
7
  from pdf2image import convert_from_bytes
8
  from google import genai
9
  from google.genai import types
 
10
 
11
  app = FastAPI(title="PDF/Image Text Extraction API")
12
 
@@ -21,14 +22,38 @@ client = genai.Client(api_key=API_KEY)
21
  def extract_text_from_image(img):
22
  """
23
  Extracts text from a PIL image using the Google GenAI API.
 
24
  """
25
- response = client.models.generate_content(
26
- model="gemini-2.0-flash",
27
- contents=["Extract the text from the image. Do not write anything except the extracted content", img]
28
- )
29
- return response.text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- @app.post("/upload", summary="Upload a PDF or image file", response_description="Returns a Markdown file with the extracted text")
32
  async def upload_file(file: UploadFile = File(...)):
33
  if not file.filename:
34
  raise HTTPException(status_code=400, detail="No file provided")
@@ -57,16 +82,9 @@ async def upload_file(file: UploadFile = File(...)):
57
 
58
  output_text += extract_text_from_image(img) + "\n\n"
59
 
60
- # Save the extracted text to a temporary Markdown file.
61
- temp_md = tempfile.NamedTemporaryFile(delete=False, suffix=".md")
62
- with open(temp_md.name, "w", encoding="utf-8") as md_file:
63
- md_file.write(output_text)
64
-
65
- # Return the file as a downloadable response.
66
- return FileResponse(temp_md.name, filename="output.md", media_type="text/markdown")
67
 
68
- # --- API Endpoints ---
69
  @app.get("/", summary="Health Check")
70
  async def root():
71
  return {"message": "API is up and running."}
72
-
 
1
  import os
2
  import io
3
+ import time
4
  import tempfile
5
  import PIL.Image
6
  from fastapi import FastAPI, File, UploadFile, HTTPException
 
7
  from pdf2image import convert_from_bytes
8
  from google import genai
9
  from google.genai import types
10
+ from google.genai.errors import ClientError
11
 
12
  app = FastAPI(title="PDF/Image Text Extraction API")
13
 
 
22
  def extract_text_from_image(img):
23
  """
24
  Extracts text from a PIL image using the Google GenAI API.
25
+ Includes error handling for RESOURCE_EXHAUSTED errors.
26
  """
27
+ max_retries = 3
28
+ for attempt in range(max_retries):
29
+ try:
30
+ response = client.models.generate_content(
31
+ model="gemini-2.0-flash",
32
+ contents=[
33
+ "Extract the text from the image. Do not write anything except the extracted content",
34
+ img,
35
+ ]
36
+ )
37
+ return response.text
38
+ except ClientError as e:
39
+ # Check if error code is 429 (RESOURCE_EXHAUSTED)
40
+ if e.status_code == 429:
41
+ if attempt < max_retries - 1:
42
+ # Wait for an exponentially increasing delay before retrying.
43
+ time.sleep(2 ** attempt)
44
+ continue
45
+ else:
46
+ raise HTTPException(
47
+ status_code=503,
48
+ detail="API resource exhausted. Please try again later."
49
+ )
50
+ else:
51
+ raise HTTPException(
52
+ status_code=500,
53
+ detail=f"Error processing image: {str(e)}"
54
+ )
55
 
56
+ @app.post("/upload", summary="Upload a PDF or image file", response_description="Returns a JSON response with the extracted text")
57
  async def upload_file(file: UploadFile = File(...)):
58
  if not file.filename:
59
  raise HTTPException(status_code=400, detail="No file provided")
 
82
 
83
  output_text += extract_text_from_image(img) + "\n\n"
84
 
85
+ # Return the extracted text as JSON.
86
+ return {"extracted_text": output_text}
 
 
 
 
 
87
 
 
88
  @app.get("/", summary="Health Check")
89
  async def root():
90
  return {"message": "API is up and running."}