neuralleap commited on
Commit
2fdbea7
·
verified ·
1 Parent(s): 3ca07d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -28,14 +28,15 @@ Extract the following fields as JSON:
28
  """
29
 
30
  def process_pdf(pdf_file):
31
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
 
32
  results = []
33
 
34
  for page_num in range(len(doc)):
35
  page = doc.load_page(page_num)
36
- pix = page.get_pixmap(dpi=200) # Adjust DPI if needed
37
 
38
- # Convert pixmap to PIL Image
39
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
40
 
41
  # Convert to base64 JPEG
 
28
  """
29
 
30
  def process_pdf(pdf_file):
31
+ # pdf_file is already bytes when using gr.File(type="binary")
32
+ doc = fitz.open(stream=pdf_file, filetype="pdf")
33
  results = []
34
 
35
  for page_num in range(len(doc)):
36
  page = doc.load_page(page_num)
37
+ pix = page.get_pixmap(dpi=200) # Use 150-200 DPI for balance
38
 
39
+ # Convert to PIL Image
40
  image = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
41
 
42
  # Convert to base64 JPEG