marker-io / server.py
Ritvik19's picture
Update server.py
3cb2b29 verified
raw
history blame
1.24 kB
import os
import base64
from marker.convert import convert_single_pdf
from marker.models import load_all_models
from marker.settings import Settings
import gradio as gr
model_list = load_all_models()
def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool):
full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
image_data = {}
if extract_images:
for i, (filename, image) in enumerate(images.items()):
# image_filepath = f"image_{i+1}.png"
image.save(filename, "PNG")
with open(filename, "rb") as f:
image_bytes = f.read()
image_base64 = base64.b64encode(image_bytes).decode('utf-8')
image_data[f'image_{i+1}'] = image_base64
os.remove(filename)
return full_text, out_meta, image_data
with gr.Blocks() as server:
gr.Markdown("Upload a PDF file to convert to markdown.")
gr.Interface(
parse_pdf_and_return_markdown,
inputs=[gr.File(label="Upload PDF", type="filepath"), gr.Checkbox(label="Extract Images")],
outputs=[gr.Textbox(label="Markdown"), gr.JSON(label="Metadata"), gr.JSON(label="Images")]
)
if __name__ == "__main__":
server.launch()