Spaces:
Sleeping
Sleeping
import gradio as gr | |
from marker.pdf import PDF # Updated import path | |
import os | |
def convert_pdf(input_file, output_format): | |
""" | |
Convert a PDF file to the specified format. | |
Args: | |
input_file: Uploaded PDF file. | |
output_format: Desired output format (Markdown, HTML, JSON). | |
Returns: | |
Path to the converted file. | |
""" | |
pdf = PDF(input_file.name) # Initialize the PDF object | |
output_file_path = f"output.{output_format.split(' ')[0].lower()}" | |
if output_format == "Markdown (.md)": | |
with open(output_file_path, "w") as f: | |
f.write(pdf.to_markdown()) | |
elif output_format == "HTML (.html)": | |
with open(output_file_path, "w") as f: | |
f.write(pdf.to_html()) | |
elif output_format == "JSON (.json)": | |
with open(output_file_path, "w") as f: | |
f.write(pdf.to_json()) | |
else: | |
return "Unsupported output format!" | |
return output_file_path | |
output_format_dropdown = gr.inputs.Dropdown( | |
["Markdown (.md)", "HTML (.html)", "JSON (.json)"], | |
label="Select Output File Format", | |
) | |
file_input = gr.inputs.File(label="Upload PDF File", type="file") | |
output_file = gr.outputs.File(label="Download Converted File") | |
gr_interface = gr.Interface( | |
fn=convert_pdf, | |
inputs=[file_input, output_format_dropdown], | |
outputs=output_file, | |
title="PDF Converter", | |
description="Upload a PDF file and select the desired output format (Markdown, HTML, or JSON).", | |
) | |
gr_interface.launch() | |