Spaces:
Sleeping
Sleeping
from hdf5_file_manager import HDF5TextManager | |
from text_analyzer import TextAnalyzer | |
from data_loader import DataLoader | |
from fast_text_trainer import FastTextTrainer | |
filename='GPAC.txt' | |
total_size=2005 | |
# data_loader=DataLoader('GPAC',chunk_size=2005) | |
# total,sentences,index=data_loader.load_hdf5() | |
# print(total) | |
# # # print(sentences) | |
# print(f"sentence count {len(sentences)}") | |
# text_analyzer=TextAnalyzer(sentences) | |
# # print(f"before space word count {len(text_analyzer.get_tokens())}") | |
# cleaned_sentences=text_analyzer.get_sentences() | |
# data_loader.save_fast_text_file('GPAC_fast.txt',cleaned_sentences) | |
# print(f"after space word count {len(text_analyzer.get_tokens())}") | |
# manager = HDF5TextManager('GPAC.h5',chunk_size=1000) | |
# manager.save(sentences) | |
# # Read the combined HDF5 file | |
# manager.read_hdf5_file("combined.h5") | |
fast_text=FastTextTrainer('GPAC_fast.txt') | |
fast_text.train_model(dim=300,epoch=10,thread=52) | |
# Initialize the summarizer |