asynchronousai commited on
Commit
8604d96
·
verified ·
1 Parent(s): cfdee1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -30
app.py CHANGED
@@ -1,50 +1,44 @@
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
3
- from docling_core.transforms.chunker import HierarchicalChunker
4
  import spaces
5
 
6
 
7
- def convert_document(file, output_format):
8
- # Load document and convert it using Docling
9
- converter = DocumentConverter()
10
- result = converter.convert(file.name)
11
-
12
- # Check available attributes in DoclingDocument
13
- available_attributes = dir(result.document)
14
- document = result.document
15
-
16
-
17
- # Choose the output format
18
- if output_format == "Markdown":
19
  converted_text = result.document.export_to_markdown()
20
- elif output_format == "JSON":
21
- converted_text = result.document.export_to_json()
 
 
 
 
 
22
  else:
23
- converted_text = "Unsupported format"
24
-
25
- # Placeholder metadata extraction based on available attributes
26
- metadata = {
27
- "Available Attributes": available_attributes,
28
- "Chunked": list(HierarchicalChunker().chunk(document))
29
- }
30
-
31
- return converted_text, metadata
32
 
33
- # Define the Gradio interface using the new component syntax
34
  with gr.Blocks() as app:
35
- gr.Markdown("# Document Converter with Docling")
36
- gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.")
37
 
38
  file_input = gr.File(label="Upload Document")
39
- format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format")
40
  output_text = gr.Textbox(label="Converted Document")
41
  output_metadata = gr.JSON(label="Metadata")
42
 
43
- # Define the process button and bind it to the function
44
  convert_button = gr.Button("Convert")
45
  convert_button.click(
46
  convert_document,
47
- inputs=[file_input, format_input],
48
  outputs=[output_text, output_metadata]
49
  )
50
 
 
1
  import gradio as gr
2
  from docling.document_converter import DocumentConverter
 
3
  import spaces
4
 
5
 
6
+ def convert_document(file, method):
7
+ if method == "Docling":
8
+ # Load document and convert it using Docling
9
+ converter = DocumentConverter()
10
+ result = converter.convert(file.name)
11
+
12
+ # Check available attributes in DoclingDocument
13
+ available_attributes = dir(result.document)
14
+ document = result.document
15
+
16
+
17
+ # Output
18
  converted_text = result.document.export_to_markdown()
19
+ metadata = {
20
+ "Available Attributes": available_attributes
21
+ }
22
+
23
+ return converted_text, metadata
24
+ elif method == "Marker":
25
+ return 'unsupported method', ''
26
  else:
27
+ return 'unknown method', ''
 
 
 
 
 
 
 
 
28
 
 
29
  with gr.Blocks() as app:
30
+ gr.Markdown("# Document Converter")
31
+ gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.")
32
 
33
  file_input = gr.File(label="Upload Document")
34
+ method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend")
35
  output_text = gr.Textbox(label="Converted Document")
36
  output_metadata = gr.JSON(label="Metadata")
37
 
 
38
  convert_button = gr.Button("Convert")
39
  convert_button.click(
40
  convert_document,
41
+ inputs=[file_input, method_input],
42
  outputs=[output_text, output_metadata]
43
  )
44