Spaces:

huggingchat
/

pdf-to-markdown

Runtime error

pdf-to-markdown / app.py

Liam Dyer

temp

9bdea1a unverified about 1 year ago

1.05 kB

	import spaces
	import gradio as gr
	from marker.convert import convert_single_pdf
	from marker.models import load_all_models
	import surya.detection as detection

	model_lst = load_all_models()


	# Monkey patch to prevent spawning processes
	def batch_text_detection(images, model, processor, batch_size=None):
	preds, orig_sizes = detection.batch_detection(
	images, model, processor, batch_size=batch_size
	)
	results = []
	for i in range(len(images)):
	result = detection.parallel_get_lines(preds[i], orig_sizes[i])
	results.append(result)

	return results


	detection.batch_text_detection = batch_text_detection


	@spaces.GPU
	def convert(file_path):
	print(file_path)
	global model_lst

	full_text, images = convert_single_pdf(
	file_path,
	model_lst,
	max_pages=None,
	langs=None,
	batch_multiplier=16,
	)

	return full_text


	gr.Interface(
	convert,
	inputs=gr.File(label="PDF file", type="filepath"),
	outputs=gr.Markdown(label="Markdown"),
	).launch()