File size: 1,561 Bytes
e2d728a
 
8e94e51
e2d728a
8e94e51
 
 
e2d728a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a07d796
 
 
 
e2d728a
a07d796
 
 
 
 
 
 
 
 
 
 
 
e2d728a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import gradio as gr
from docling.document_converter import DocumentConverter
import spaces



@spaces.GPU
def convert_document(file, output_format):
    # Load document and convert it using Docling
    converter = DocumentConverter()
    result = converter.convert(file.name)
    
    # Choose the output format
    if output_format == "Markdown":
        converted_text = result.document.export_to_markdown()
    elif output_format == "JSON":
        converted_text = result.document.export_to_json()
    else:
        converted_text = "Unsupported format"

    # Extract metadata
    metadata = {
        "Title": result.document.title,
        "Author": result.document.author,
        "Language": result.document.language,
        "References": result.document.references
    }

    return converted_text, metadata

# Define the Gradio interface using new component syntax
with gr.Blocks() as app:
    gr.Markdown("# Document Converter with Docling")
    gr.Markdown("Upload a document, choose the output format, and get the converted text with metadata.")

    file_input = gr.File(label="Upload Document")
    format_input = gr.Radio(["Markdown", "JSON"], label="Choose Output Format")
    output_text = gr.Textbox(label="Converted Document")
    output_metadata = gr.JSON(label="Metadata")

    # Define the process button and bind it to the function
    convert_button = gr.Button("Convert")
    convert_button.click(
        convert_document,
        inputs=[file_input, format_input],
        outputs=[output_text, output_metadata]
    )

app.launch()