Spaces:
Sleeping
Sleeping
import concurrent.futures | |
from extractors.model import LlamaParseModel, UnstructuredModel, GPTModel, ClaudeModel, AnyParserModel | |
DEFAULT_TIMEOUT = 30 | |
ap_rt = AnyParserModel() | |
lp = LlamaParseModel() | |
un = UnstructuredModel() | |
gpt = GPTModel() | |
claude = ClaudeModel() | |
model_function_map = { | |
"AnyParser": ap_rt.run, | |
"LlamaParse": lp.run, | |
"Unstructured": un.run, | |
"GPT-4o-mini": gpt.run, | |
"Claude-3.5-Sonnet": claude.run, | |
} | |
models = [key for key in model_function_map] | |
def run_extract(model, file_path): | |
print('Running extract: model', model, 'file_path', file_path) | |
extractor = model_function_map[model] | |
markdown = extractor(file_path) | |
return markdown | |
def run_extract_parallel(model_a, model_b, pdf, timeout=DEFAULT_TIMEOUT): | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
# Submit tasks to the executor for parallel execution | |
future_a = executor.submit(run_extract, model_a, pdf) | |
future_b = executor.submit(run_extract, model_b, pdf) | |
try: | |
# Get the results with a timeout | |
result_a = future_a.result(timeout=timeout) | |
except concurrent.futures.TimeoutError: | |
result_a = f"Error: Timeout after {timeout} seconds" | |
try: | |
result_b = future_b.result(timeout=timeout) | |
except concurrent.futures.TimeoutError: | |
result_b = f"Error: Timeout after {timeout} seconds" | |
return result_a, result_b |