import gradio as gr from docling.document_converter import DocumentConverter from llama_index.readers.docling import DoclingReader import json import tempfile import os def convert_document_docling(file, output_format): try: reader = DoclingReader(export_type=DoclingReader.ExportType.JSON if output_format == "JSON" else DoclingReader.ExportType.MARKDOWN) docs = reader.load_data(file_path=file.name) converted_text = docs[0].text temp_dir = tempfile.gettempdir() output_filename = os.path.splitext(os.path.basename(file.name))[0] + (".json" if output_format == "JSON" else ".md") output_path = os.path.join(temp_dir, output_filename) with open(output_path, 'w', encoding='utf-8') as f: f.write(converted_text) metadata = { "Filename": file.name, "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB", "Output Format": output_format, "Conversion Status": "Success", "Method": "llama-index-readers-docling" } return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!" except Exception as e: error_metadata = {"Error": str(e), "Status": "Failed"} return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion" def convert_document_original(file, output_format): try: converter = DocumentConverter() result = converter.convert(file.name) temp_dir = tempfile.gettempdir() if output_format == "Markdown": converted_text = result.document.export_to_markdown() file_extension = ".md" else: converted_text = result.document.export_to_json() file_extension = ".json" output_filename = os.path.splitext(os.path.basename(file.name))[0] + file_extension output_path = os.path.join(temp_dir, output_filename) with open(output_path, 'w', encoding='utf-8') as f: f.write(converted_text) metadata = { "Filename": file.name, "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB", "Output Format": output_format, "Conversion Status": "Success", "Method": "docling" } return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!" except Exception as e: error_metadata = {"Error": str(e), "Status": "Failed"} return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion" custom_css = """ :root { --primary-color: #2563eb; --secondary-color: #1e40af; --background-color: #1e1e1e; --card-background: #262626; --text-color: #ffffff; --border-radius: 10px; } body { background-color: var(--background-color); color: var(--text-color); } .container { max-width: 1200px; margin: 0 auto; padding: 2rem; } .gr-button { background: var(--primary-color) !important; border: none !important; color: white !important; padding: 10px 20px !important; border-radius: var(--border-radius) !important; transition: all 0.3s ease !important; } .gr-button:hover { background: var(--secondary-color) !important; transform: translateY(-2px); box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } .gr-form { background-color: var(--card-background); padding: 2rem; border-radius: var(--border-radius); box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); } .gr-input, .gr-textbox { background-color: #333333 !important; border: 1px solid #404040 !important; color: var(--text-color) !important; border-radius: var(--border-radius) !important; } .gr-padded { padding: 1rem; } .gr-header { margin-bottom: 2rem; text-align: center; } .gr-subtitle { color: #9ca3af; font-size: 1.1rem; margin-bottom: 1.5rem; } """ with gr.Blocks(css=custom_css) as app: gr.HTML( """

📄 Docling Document Converter

Transform your documents into Markdown or JSON format with ease

""" ) with gr.Row(): with gr.Column(scale=1): with gr.Group(): gr.Markdown("### Input Settings") file_input = gr.File( label="Upload Your Document", file_types=[".doc", ".docx", ".pdf", ".txt"], elem_classes="gr-input" ) format_input = gr.Radio( choices=["Markdown", "JSON"], label="Output Format", value="Markdown", elem_classes="gr-input" ) method_input = gr.Radio( choices=["docling", "llama-index-readers-docling"], label="Conversion Method", value="docling", elem_classes="gr-input" ) convert_button = gr.Button( "🔄 Convert Document", variant="primary", elem_classes=["gr-button"] ) status_message = gr.Textbox( label="Status", interactive=False, visible=False, elem_classes="gr-padded" ) with gr.Column(scale=2): with gr.Group(): gr.Markdown("### Conversion Output") output_text = gr.Textbox( label="Converted Content", placeholder="The converted text will appear here...", lines=15, elem_classes="gr-textbox" ) output_metadata = gr.JSON( label="Document Metadata", elem_classes="gr-input" ) download_button = gr.File( label="Download Converted File", visible=False, elem_classes="gr-padded" ) with gr.Row(): progress = gr.Slider(minimum=0, maximum=100, value=0, label="Progress", interactive=False) convert_button.click( fn=lambda file, format, method: convert_document_docling(file, format, progress) if method == "llama-index-readers-docling" else convert_document_original(file, format, progress), inputs=[file_input, format_input, method_input], outputs=[output_text, output_metadata, download_button, download_button, status_message, progress] ) def update_progress(current, total): return (current / total) * 100 def convert_document_docling(file, output_format, progress): try: reader = DoclingReader(export_type=DoclingReader.ExportType.JSON if output_format == "JSON" else DoclingReader.ExportType.MARKDOWN) docs = reader.load_data(file_path=file.name) converted_text = docs[0].text temp_dir = tempfile.gettempdir() output_filename = os.path.splitext(os.path.basename(file.name))[0] + (".json" if output_format == "JSON" else ".md") output_path = os.path.join(temp_dir, output_filename) with open(output_path, 'w', encoding='utf-8') as f: f.write(converted_text) metadata = { "Filename": file.name, "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB", "Output Format": output_format, "Conversion Status": "Success", "Method": "llama-index-readers-docling" } progress(100) # Set progress to 100% once complete return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!", 100 except Exception as e: error_metadata = {"Error": str(e), "Status": "Failed"} return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion", 0 def convert_document_original(file, output_format, progress): try: converter = DocumentConverter() result = converter.convert(file.name) temp_dir = tempfile.gettempdir() if output_format == "Markdown": converted_text = result.document.export_to_markdown() file_extension = ".md" else: converted_text = result.document.export_to_json() file_extension = ".json" output_filename = os.path.splitext(os.path.basename(file.name))[0] + file_extension output_path = os.path.join(temp_dir, output_filename) with open(output_path, 'w', encoding='utf-8') as f: f.write(converted_text) metadata = { "Filename": file.name, "File Size": f"{os.path.getsize(file.name) / 1024:.2f} KB", "Output Format": output_format, "Conversion Status": "Success", "Method": "docling" } progress(100) # Set progress to 100% once complete return converted_text, metadata, output_path, gr.update(visible=True), "✅ Document converted successfully!", 100 except Exception as e: error_metadata = {"Error": str(e), "Status": "Failed"} return "", error_metadata, None, gr.update(visible=False), "❌ Error during conversion", 0 app.launch(debug=True, share=True)