Spaces:
Running
Running
File size: 1,961 Bytes
e2d728a 1ba64f5 0884aec 8e024f6 1ba64f5 ba47b56 b562a6c e2d728a ba47b56 0884aec ba47b56 cfdee1a 8604d96 ba47b56 8604d96 266215f e2d728a 4a9e7b8 e2d728a a07d796 8604d96 e2d728a a07d796 8604d96 a07d796 8604d96 4a9e7b8 a07d796 e2d728a e665966 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.pipeline_options import (
AcceleratorDevice,
PdfPipelineOptions,
AcceleratorOptions
)
import spaces
from docling.datamodel.base_models import InputFormat
from marker.converters.pdf import PdfConverter
from marker.models import create_model_dict
from marker.output import text_from_rendered
# Docling
accelerator_options = AcceleratorOptions(
num_threads=8, device=AcceleratorDevice.CPU
)
pipeline_options = PdfPipelineOptions()
pipeline_options.accelerator_options = accelerator_options
pipeline_options.do_ocr = True
pipeline_options.do_table_structure = True
pipeline_options.table_structure_options.do_cell_matching = True
docling_converter = DocumentConverter(
format_options={
InputFormat.PDF: PdfFormatOption(
pipeline_options=pipeline_options,
)
}
)
# Marker
marker_converter = PdfConverter(
artifact_dict=create_model_dict(),
)
def convert_document(file, method):
if method == "Docling":
result = docling_converter.convert(file.name)
return result.document.export_to_markdown()
elif method == "Marker":
rendered = marker_converter(file.name)
text, _, images = text_from_rendered(rendered)
return text
else:
return 'unknown method'
with gr.Blocks() as app:
gr.Markdown("# Document Converter")
gr.Markdown("Upload a document, choose the backend, and get the converted text with metadata.")
file_input = gr.File(label="Upload Document")
method_input = gr.Radio(["Docling", "Marker"], label="Choose Conversion Backend")
output_text = gr.Textbox(label="Converted Document")
convert_button = gr.Button("Convert")
convert_button.click(
convert_document,
inputs=[file_input, method_input],
outputs=[output_text]
)
app.launch(debug=True, show_error=True) |