sblumenf commited on
Commit
c506d0d
·
verified ·
1 Parent(s): f15272f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -2,6 +2,7 @@ import json
2
  import gradio as gr
3
  from pdfminer.high_level import extract_pages, extract_text
4
  from pdfminer.layout import LTTextBoxHorizontal, LTFigure, LTImage
 
5
 
6
  def parse_pdf(pdf_file, output_format):
7
  with open(pdf_file, 'rb') as file:
@@ -21,7 +22,9 @@ def parse_pdf(pdf_file, output_format):
21
  # Here's an example of saving images with a unique filename
22
  image_data = element # Replace with your image extraction logic
23
  image_filename = f"extracted_image_{len(images)}.jpg"
24
- # ... (Implement image saving logic using the filename)
 
 
25
  images.append({"filename": image_filename}) # Add filename to image data
26
 
27
  # Implement table extraction logic (e.g., using heuristics or advanced techniques)
@@ -58,7 +61,11 @@ def parse_pdf(pdf_file, output_format):
58
  # ... (e.g., use `<img>` tags with image source)
59
  download_data = html_text.encode("utf-8")
60
 
61
- return text, download_data
 
 
 
 
62
 
63
  iface = gr.Interface(
64
  fn=parse_pdf,
 
2
  import gradio as gr
3
  from pdfminer.high_level import extract_pages, extract_text
4
  from pdfminer.layout import LTTextBoxHorizontal, LTFigure, LTImage
5
+ import os # Import os for file path manipulation
6
 
7
  def parse_pdf(pdf_file, output_format):
8
  with open(pdf_file, 'rb') as file:
 
22
  # Here's an example of saving images with a unique filename
23
  image_data = element # Replace with your image extraction logic
24
  image_filename = f"extracted_image_{len(images)}.jpg"
25
+ # Save the image using the filename
26
+ with open(image_filename, 'wb') as image_file:
27
+ image_file.write(image_data) # Assuming image_data is binary data
28
  images.append({"filename": image_filename}) # Add filename to image data
29
 
30
  # Implement table extraction logic (e.g., using heuristics or advanced techniques)
 
61
  # ... (e.g., use `<img>` tags with image source)
62
  download_data = html_text.encode("utf-8")
63
 
64
+ # Create a temporary directory to store downloaded files (optional)
65
+ # download_dir = tempfile.mkdtemp() # Uncomment if needed for temporary storage
66
+
67
+ # Return the extracted text and the filename (or path) for download
68
+ return text, os.path.join(os.getcwd(), images[0]["filename"]) # Example using first image
69
 
70
  iface = gr.Interface(
71
  fn=parse_pdf,