File size: 1,260 Bytes
c8a32e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import os
import base64
from marker.convert import convert_single_pdf
from marker.models import load_all_models
from marker.settings import Settings
import gradio as gr
model_list = load_all_models()
def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool):
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
image_data = {}
if extract_images:
for i, (filename, image) in enumerate(images.items()):
image_filepath = f"image_{i+1}.png"
image.save(image_filepath, "PNG")
with open(image_filepath, "rb") as f:
image_bytes = f.read()
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
image_data[f'image_{i+1}'] = image_base64
os.remove(image_filepath)
return full_text, out_meta, image_data
with gr.Blocks() as server:
gr.Markdown("Upload a PDF file to convert to markdown.")
gr.Interface(
parse_pdf_and_return_markdown,
inputs=[gr.File(label="Upload PDF", type="filepath"), gr.Checkbox(label="Extract Images")],
outputs=[gr.Textbox(label="Markdown"), gr.JSON(label="Metadata"), gr.JSON(label="Images")]
)
if __name__ == "__main__":
server.launch() |