|
import abc_1
|
|
import time
|
|
import sys
|
|
from docx import Document
|
|
from pdfminer.high_level import extract_text
|
|
import json
|
|
|
|
if __name__ == '__main__':
|
|
start = time.time()
|
|
if len(sys.argv) > 1:
|
|
data = sys.argv[1]
|
|
categories_keywords_dict = json.loads(data)
|
|
else:
|
|
print("No data provided.")
|
|
categories_keywords_dict1 = {
|
|
'AI': ['Artificial', 'Intelligence'],
|
|
'Automata': ['finite', 'state', 'machines'],
|
|
'DT': ['game', 'theory']
|
|
}
|
|
|
|
input='input'
|
|
output='output'
|
|
compiled_keywords = abc_1.compile_keywords(categories_keywords_dict1)
|
|
abc_1.multi_process_categorizer(input, output , compiled_keywords, num_processes=8)
|
|
end = time.time()
|
|
print(f"Categorization completed in {end - start:.2f} seconds") |