File size: 763 Bytes
11551ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
#!/usr/bin/env python3
import os
import glob
import logging
import torch.multiprocessing as mp
from parallel_multiproc import process_batch_in_parallel
logging.basicConfig(level=logging.INFO)
def main():
pdf_dir = "/home/user/app/test_pdf"
output_dir = "/home/user/app/pdf_output"
os.makedirs(output_dir, exist_ok=True)
pdf_files = glob.glob(os.path.join(pdf_dir, "*.pdf"))
logging.info(f"Found {len(pdf_files)} PDF files to process")
process_batch_in_parallel(
pdf_paths=pdf_files,
output_dir=output_dir,
num_workers=2, #for our T4 small specifically, do not change it
num_gpus=1
)
if __name__ == "__main__":
mp.set_start_method("spawn", force=True)
main() |