Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
import gradio as gr | |
from marker.convert import convert_single_pdf | |
from marker.models import load_all_models | |
import surya.detection as detection | |
model_lst = load_all_models() | |
# Monkey patch to prevent spawning processes | |
def batch_text_detection(images, model, processor, batch_size=None): | |
preds, orig_sizes = detection.batch_detection( | |
images, model, processor, batch_size=batch_size | |
) | |
results = [] | |
for i in range(len(images)): | |
result = detection.parallel_get_lines(preds[i], orig_sizes[i]) | |
results.append(result) | |
return results | |
detection.batch_text_detection = batch_text_detection | |
def convert(file_path): | |
print(file_path) | |
global model_lst | |
full_text, images = convert_single_pdf( | |
file_path, | |
model_lst, | |
max_pages=None, | |
langs=None, | |
batch_multiplier=16, | |
) | |
return full_text | |
gr.Interface( | |
convert, | |
inputs=gr.File(label="PDF file", type="filepath"), | |
outputs=gr.Markdown(label="Markdown"), | |
).launch() | |