sblumenf commited on
Commit
f598e4b
·
verified ·
1 Parent(s): 7dec78f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -5
app.py CHANGED
@@ -2,7 +2,9 @@ import json
2
  import gradio as gr
3
  from pdfminer.high_level import extract_pages, extract_text
4
  from pdfminer.layout import LTTextBoxHorizontal, LTFigure, LTImage
5
- import mistletoe # for Markdown table generation (optional)
 
 
6
 
7
  def parse_pdf(pdf_file, output_format):
8
  with open(pdf_file, 'rb') as file:
@@ -37,10 +39,14 @@ def parse_pdf(pdf_file, output_format):
37
  download_data = json.dumps(json_data).encode("utf-8") # Encode JSON for download
38
 
39
  elif output_format == "Markdown":
40
- # Implement table conversion using mistletoe or other Markdown libraries
41
- markdown_tables = mistletoe.markdown(convert_table=True)(tables) # Example using mistletoe
42
 
43
- markdown_text = f"# Extracted Text\n\n{text}\n\n{markdown_tables}\n\n# Images\n"
 
 
 
 
44
  # Implement image conversion (e.g., relative paths or base64 encoding)
45
  # ...
46
  download_data = markdown_text.encode("utf-8")
@@ -49,7 +55,7 @@ def parse_pdf(pdf_file, output_format):
49
  # Implement table conversion using HTML table tags
50
  html_tables = "<table>" # Start of HTML table (replace with actual table structure)
51
  # ... (Implement table data conversion to HTML)
52
- html_tables += "</table>"
53
 
54
  html_text = f"<p>{text}</p>\n\n<h2>Tables</h2>\n{html_tables}\n\n<h2>Images</h2>\n"
55
  # Implement image conversion using `<img>` tag
 
2
  import gradio as gr
3
  from pdfminer.high_level import extract_pages, extract_text
4
  from pdfminer.layout import LTTextBoxHorizontal, LTFigure, LTImage
5
+
6
+ # Optional import for Markdown table generation (comment out if not needed)
7
+ # import mistletoe
8
 
9
  def parse_pdf(pdf_file, output_format):
10
  with open(pdf_file, 'rb') as file:
 
39
  download_data = json.dumps(json_data).encode("utf-8") # Encode JSON for download
40
 
41
  elif output_format == "Markdown":
42
+ # Implement table conversion using mistletoe or other Markdown libraries (uncomment if needed)
43
+ # markdown_tables = mistletoe.markdown(convert_table=True)(tables) # Example using mistletoe
44
 
45
+ markdown_text = f"# Extracted Text\n\n{text}\n\n# Tables\n"
46
+ # Implement table conversion (e.g., manually create Markdown table structure)
47
+ # ... (replace with your table conversion logic)
48
+ # markdown_text += markdown_tables # Uncomment if using mistletoe
49
+ markdown_text += "\n# Images\n"
50
  # Implement image conversion (e.g., relative paths or base64 encoding)
51
  # ...
52
  download_data = markdown_text.encode("utf-8")
 
55
  # Implement table conversion using HTML table tags
56
  html_tables = "<table>" # Start of HTML table (replace with actual table structure)
57
  # ... (Implement table data conversion to HTML)
58
+ # html_tables += "</table>"
59
 
60
  html_text = f"<p>{text}</p>\n\n<h2>Tables</h2>\n{html_tables}\n\n<h2>Images</h2>\n"
61
  # Implement image conversion using `<img>` tag