File size: 1,485 Bytes
e2d728a
 
8e024f6
ba47b56
 
 
e2d728a
ba47b56
 
 
 
266215f
ba47b56
 
 
 
 
 
cfdee1a
8604d96
 
ba47b56
 
 
8604d96
266215f
 
 
e2d728a
4a9e7b8
e2d728a
a07d796
8604d96
 
e2d728a
a07d796
8604d96
a07d796
 
 
 
 
8604d96
4a9e7b8
a07d796
e2d728a
e665966
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import gradio as gr
from docling.document_converter import DocumentConverter
import spaces
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.config.parser import ConfigParser

# Docling
docling_converter = DocumentConverter()

# Marker
config_parser = ConfigParser({})
marker_converter = PdfConverter(
    config=config_parser.generate_config_dict(),
    artifact_dict=create_model_dict(),
    processor_list=config_parser.get_processors(),
    renderer=config_parser.get_renderer()
)

def convert_document(file, method):
    if method == "Docling":
        result = docling_converter.convert(file.name)
        
        return result.document.export_to_markdown()
    elif method == "Marker":
        rendered = marker_converter(file.name)
        text, _, images = text_from_rendered(rendered)
        return text
    else:
        return 'unknown method'

with gr.Blocks() as app:
    gr.Markdown("# Document Converter")
    gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.")

    file_input = gr.File(label="Upload Document")
    method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend")
    output_text = gr.Textbox(label="Converted Document")

    convert_button = gr.Button("Convert")
    convert_button.click(
        convert_document,
        inputs=[file_input, method_input],
        outputs=[output_text]
    )

app.launch(debug=True, show_error=True)