Spaces:
Running
Running
File size: 1,485 Bytes
e2d728a 8e024f6 ba47b56 e2d728a ba47b56 266215f ba47b56 cfdee1a 8604d96 ba47b56 8604d96 266215f e2d728a 4a9e7b8 e2d728a a07d796 8604d96 e2d728a a07d796 8604d96 a07d796 8604d96 4a9e7b8 a07d796 e2d728a e665966 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
import gradio as gr
from docling.document_converter import DocumentConverter
import spaces
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.config.parser import ConfigParser
# Docling
docling_converter = DocumentConverter()
# Marker
config_parser = ConfigParser({})
marker_converter = PdfConverter(
config=config_parser.generate_config_dict(),
artifact_dict=create_model_dict(),
processor_list=config_parser.get_processors(),
renderer=config_parser.get_renderer()
)
def convert_document(file, method):
if method == "Docling":
result = docling_converter.convert(file.name)
return result.document.export_to_markdown()
elif method == "Marker":
rendered = marker_converter(file.name)
text, _, images = text_from_rendered(rendered)
return text
else:
return 'unknown method'
with gr.Blocks() as app:
gr.Markdown("# Document Converter")
gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.")
file_input = gr.File(label="Upload Document")
method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend")
output_text = gr.Textbox(label="Converted Document")
convert_button = gr.Button("Convert")
convert_button.click(
convert_document,
inputs=[file_input, method_input],
outputs=[output_text]
)
app.launch(debug=True, show_error=True) |