import abc_1 import time import sys from docx import Document # Assuming DOCX support is desired from pdfminer.high_level import extract_text # Import for PDF text extraction import json if __name__ == '__main__': start = time.time() if len(sys.argv) > 1: data = sys.argv[1] categories_keywords_dict = json.loads(data) else: print("No data provided.") categories_keywords_dict1 = { 'AI': ['Artificial', 'Intelligence'], 'Automata': ['finite', 'state', 'machines'], 'DT': ['game', 'theory'] } input='input'#file path here output='output'#and here compiled_keywords = abc_1.compile_keywords(categories_keywords_dict1) abc_1.multi_process_categorizer(input, output , compiled_keywords, num_processes=8) # Adjust processes as needed end = time.time() print(f"Categorization completed in {end - start:.2f} seconds")