Spaces:
Sleeping
Sleeping
File size: 964 Bytes
a608bb4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
from hdf5_file_manager import HDF5TextManager
from text_analyzer import TextAnalyzer
from data_loader import DataLoader
from fast_text_trainer import FastTextTrainer
filename='GPAC.txt'
total_size=2005
# data_loader=DataLoader('GPAC',chunk_size=2005)
# total,sentences,index=data_loader.load_hdf5()
# print(total)
# # # print(sentences)
# print(f"sentence count {len(sentences)}")
# text_analyzer=TextAnalyzer(sentences)
# # print(f"before space word count {len(text_analyzer.get_tokens())}")
# cleaned_sentences=text_analyzer.get_sentences()
# data_loader.save_fast_text_file('GPAC_fast.txt',cleaned_sentences)
# print(f"after space word count {len(text_analyzer.get_tokens())}")
# manager = HDF5TextManager('GPAC.h5',chunk_size=1000)
# manager.save(sentences)
# # Read the combined HDF5 file
# manager.read_hdf5_file("combined.h5")
fast_text=FastTextTrainer('GPAC_fast.txt')
fast_text.train_model(dim=300,epoch=10,thread=52)
# Initialize the summarizer |