Spaces:
Sleeping
Sleeping
import concurrent.futures | |
from extractors.model import AnyParserModel, LlamaParseModel, UnstructuredModel, GPTModel, ClaudeModel | |
ap_rt = AnyParserModel() | |
lp = LlamaParseModel() | |
un = UnstructuredModel() | |
gpt = GPTModel() | |
claude = ClaudeModel() | |
model_function_map = { | |
"AnyParser": ap_rt.extract, | |
"LlamaParse": lp.extract, | |
"Unstructured": un.extract, | |
"GPT-4o-mini": gpt.extract, | |
"Claude-3.5-Sonnet": claude.extract, | |
} | |
models = [key for key in model_function_map] | |
def run_extract(model, file_path): | |
print('Running extract: model', model, 'file_path', file_path) | |
extractor = model_function_map[model] | |
markdown = extractor(file_path) | |
return markdown | |
def run_extract_parallel(model_a, model_b, pdf): | |
with concurrent.futures.ThreadPoolExecutor() as executor: | |
# Submit tasks to the executor for parallel execution | |
future_a = executor.submit(run_extract, model_a, pdf) | |
future_b = executor.submit(run_extract, model_b, pdf) | |
# Get the results as they complete | |
result_a = future_a.result() | |
result_b = future_b.result() | |
return result_a, result_b | |