Spaces:
Build error
Build error
File size: 1,740 Bytes
22738ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# -*- coding: utf-8 -*-
from App.bin import constants
from App.bin.InputHandler import InputHandler
from App.bin.PatentHandler import PatentHandler
from App.bin.CorpusProcessor import CorpusProcessor
import time
start_time = time.time()
def main():
#renseigner nom du dossier de corpus et extension de fichier
print("Starting process!")
while True:
try:
input_folder = input("Please Enter your input folder name and press 'ENTER': ")
# comment next line for production mode
#input_folder= "Staubli"
if not input_folder:
raise ValueError("We didn't understand you.")
files_extension = input("Please Enter your files extensions(txt,xml or * for all): ")
#comment next line for production mode
# original code
# files_extension = "txt"
# files_extension = "xml"
if not files_extension:
raise ValueError("We didn't understand you.")
except ValueError as e:
print(e)
continue
else:
break
input_folder = constants.DATA_INPUT + input_folder
files_extension = "*." + files_extension
iInput = InputHandler(input_folder, files_extension)
input_data = iInput.get_input()
pretreat_data = PatentHandler(input_data)
clean_patent_data = pretreat_data.pretreat_data()
process_data = CorpusProcessor(clean_patent_data,input_folder, files_extension)
processed_data = process_data.process_corpus()
print("Process is finished within %s seconds" % round(time.time() - start_time,2))
if __name__ == "__main__":
main()
|