from hdf5_file_manager import HDF5TextManager from text_analyzer import TextAnalyzer from data_loader import DataLoader from fast_text_trainer import FastTextTrainer filename='GPAC.txt' total_size=2005 # data_loader=DataLoader('GPAC',chunk_size=2005) # total,sentences,index=data_loader.load_hdf5() # print(total) # # # print(sentences) # print(f"sentence count {len(sentences)}") # text_analyzer=TextAnalyzer(sentences) # # print(f"before space word count {len(text_analyzer.get_tokens())}") # cleaned_sentences=text_analyzer.get_sentences() # data_loader.save_fast_text_file('GPAC_fast.txt',cleaned_sentences) # print(f"after space word count {len(text_analyzer.get_tokens())}") # manager = HDF5TextManager('GPAC.h5',chunk_size=1000) # manager.save(sentences) # # Read the combined HDF5 file # manager.read_hdf5_file("combined.h5") fast_text=FastTextTrainer('GPAC_fast.txt') fast_text.train_model(dim=300,epoch=10,thread=52) # Initialize the summarizer