import os import base64 from marker.convert import convert_single_pdf from marker.models import load_all_models from marker.settings import Settings import gradio as gr model_list = load_all_models() def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool): full_text, images, out_meta = convert_single_pdf(pdf_file, model_list) image_data = {} if extract_images: for i, (filename, image) in enumerate(images.items()): # image_filepath = f"image_{i+1}.png" image.save(filename, "PNG") with open(filename, "rb") as f: image_bytes = f.read() image_base64 = base64.b64encode(image_bytes).decode('utf-8') image_data[f'image_{i+1}'] = image_base64 os.remove(filename) return full_text, out_meta, image_data with gr.Blocks() as server: gr.Markdown("Upload a PDF file to convert to markdown.") gr.Interface( parse_pdf_and_return_markdown, inputs=[gr.File(label="Upload PDF", type="filepath"), gr.Checkbox(label="Extract Images")], outputs=[gr.Textbox(label="Markdown"), gr.JSON(label="Metadata"), gr.JSON(label="Images")] ) if __name__ == "__main__": server.launch()