Nechba commited on
Commit
326d072
·
verified ·
1 Parent(s): 743ccae

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +17 -4
utils.py CHANGED
@@ -8,15 +8,28 @@ from PyPDF2 import PdfReader
8
  import re
9
  import csv
10
  from PIL import Image
11
- from pdf2image import convert_from_bytes
 
12
 
13
  def configure_gemini(api_key: str):
14
  """Configure Gemini API with the provided key"""
15
  genai.configure(api_key=api_key)
16
 
17
- def pdf_to_images(pdf_bytes: bytes) -> list:
18
- """Convert PDF bytes to list of PIL Images"""
19
- return convert_from_bytes(pdf_bytes)
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  def analyze_single_document(images: list, prompt: str) -> dict:
22
  """Analyze a single document and return results"""
 
8
  import re
9
  import csv
10
  from PIL import Image
11
+ import fitz # PyMuPDF
12
+ from PIL import Image
13
 
14
  def configure_gemini(api_key: str):
15
  """Configure Gemini API with the provided key"""
16
  genai.configure(api_key=api_key)
17
 
18
+ # def pdf_to_images(pdf_bytes: bytes) -> list:
19
+ # """Convert PDF bytes to list of PIL Images"""
20
+ # return convert_from_bytes(pdf_bytes)
21
+
22
+
23
+
24
+ def pdf_to_images(pdf_bytes: bytes) -> list[Image.Image]:
25
+ """Convert PDF to PIL Images using PyMuPDF (no poppler needed)."""
26
+ doc = fitz.open(stream=pdf_bytes, filetype="pdf")
27
+ images = []
28
+ for page in doc:
29
+ pix = page.get_pixmap()
30
+ img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
31
+ images.append(img)
32
+ return images
33
 
34
  def analyze_single_document(images: list, prompt: str) -> dict:
35
  """Analyze a single document and return results"""