Spaces:

Ritvik19
/

marker-io

Runtime error

App Files Files Community

marker-io / server.py

Ritvik19

Add all files and directories

c8a32e7 about 1 year ago

raw

history blame

2.58 kB

	import os
	import base64
	from marker.convert import convert_single_pdf
	from marker.models import load_all_models
	from marker.settings import Settings
	import gradio as gr


	model_list = load_all_models()

	def parse_pdf_and_return_markdown(pdf_file: bytes , extract_images: bool):
	full_text, images, out_meta = convert_single_pdf(pdf_file, model_list)
	image_data = {}
	if extract_images:
	for i, (filename, image) in enumerate(images.items()):
	image_filepath = f"image_{i+1}.png"
	image.save(image_filepath, "PNG")

	with open(image_filepath, "rb") as f:
	image_bytes = f.read()

	image_base64 = base64.b64encode(image_bytes).decode('utf-8')
	image_data[f'image_{i+1}'] = image_base64

	os.remove(image_filepath)

	return full_text, out_meta, image_data


	# @app.post("/convert")
	# async def convert_pdf_to_markdown(pdf_file: UploadFile = File(...), extract_images: bool = True):
	# if extract_images == False:
	# Settings.EXTRACT_IMAGES = False
	# print("Print EXTRACT_IMAGES set to False")
	# else:
	# Settings.EXTRACT_IMAGES = True
	# if pdf_file.content_type != "application/pdf":
	# raise HTTPException(
	# status_code=status.HTTP_415_UNSUPPORTED_MEDIA_TYPE,
	# detail=f'File {pdf_file.filename} has unsupported extension type',
	# )
	# markdown_text, metadata, image_data = parse_pdf_and_return_markdown(await pdf_file.read(), extract_images=extract_images)
	# return {"markdown": markdown_text, "metadata": metadata, "images": image_data }

	# def main():
	# # Parse command-line arguments
	# parser = argparse.ArgumentParser(description="Run the marker-api server.")
	# parser.add_argument("--host", default="127.0.0.1", help="Host IP address")
	# parser.add_argument("--port", type=int, default=8000, help="Port number")
	# args = parser.parse_args()

	# # Load all models before starting the server
	# configure_logging() # Assuming this function initializes logging

	# # Start the server
	# import uvicorn
	# uvicorn.run(app, host=args.host, port=args.port)

	with gr.Blocks() as server:
	gr.Markdown("Upload a PDF file to convert to markdown.")
	gr.Interface(
	parse_pdf_and_return_markdown,
	inputs=[gr.File(label="Upload PDF", type="filepath"), gr.Checkbox(label="Extract Images")],
	outputs=[gr.Textbox(label="Markdown"), gr.JSON(label="Metadata"), gr.JSON(label="Images")]
	)


	if __name__ == "__main__":
	server.launch()