import spaces import gradio as gr import surya.detection as detection import surya.layout as layout # Monkey patch to prevent spawning processes def batch_text_detection(images, model, processor, batch_size=None): preds, orig_sizes = detection.batch_detection( images, model, processor, batch_size=batch_size ) results = [] for i in range(len(images)): result = detection.parallel_get_lines(preds[i], orig_sizes[i]) results.append(result) return results detection.batch_text_detection = batch_text_detection def batch_layout_detection( images, model, processor, detection_results=None, batch_size=None ): preds, orig_sizes = layout.batch_detection( images, model, processor, batch_size=batch_size ) id2label = model.config.id2label results = [] for i in range(len(images)): result = layout.parallel_get_regions( preds[i], orig_sizes[i], id2label, detection_results[i] if detection_results else None, ) results.append(result) return results layout.batch_layout_detection = batch_layout_detection from marker.convert import convert_single_pdf from marker.models import load_all_models model_lst = load_all_models() @spaces.GPU def convert(file_path): print(file_path) global model_lst full_text, images = convert_single_pdf( file_path, model_lst, max_pages=None, langs=None, batch_multiplier=16, ) return full_text gr.Interface( convert, inputs=gr.File(label="PDF file", type="filepath"), outputs=gr.Markdown(label="Markdown"), ).launch()