import concurrent.futures from extractors.model import LlamaParseModel, UnstructuredModel, GPTModel, ClaudeModel, AnyParserModel DEFAULT_TIMEOUT = 30 ap_rt = AnyParserModel() lp = LlamaParseModel() un = UnstructuredModel() gpt = GPTModel() claude = ClaudeModel() model_function_map = { "AnyParser": ap_rt.run, "LlamaParse": lp.run, "Unstructured": un.run, "GPT-4o-mini": gpt.run, "Claude-3.5-Sonnet": claude.run, } models = [key for key in model_function_map] def run_extract(model, file_path): print('Running extract: model', model, 'file_path', file_path) extractor = model_function_map[model] markdown = extractor(file_path) return markdown def run_extract_parallel(model_a, model_b, pdf, timeout=DEFAULT_TIMEOUT): with concurrent.futures.ThreadPoolExecutor() as executor: # Submit tasks to the executor for parallel execution future_a = executor.submit(run_extract, model_a, pdf) future_b = executor.submit(run_extract, model_b, pdf) try: # Get the results with a timeout result_a = future_a.result(timeout=timeout) except concurrent.futures.TimeoutError: result_a = f"Error: Timeout after {timeout} seconds" try: result_b = future_b.result(timeout=timeout) except concurrent.futures.TimeoutError: result_b = f"Error: Timeout after {timeout} seconds" return result_a, result_b