File size: 2,665 Bytes
e8ac98a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import os
import joblib
import argparse
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from training.trainer import train
from training.post_processor import save_json, inverse_transform
from training.data_processor import (
scale_data,
get_datasets,
preprocess_data,
create_sequences
)
from training.model_builder import (
gru_model,
lstm_model,
lstm_gru_model
)
from warnings import filterwarnings
filterwarnings('ignore')
async def main(algorithm: str, sequence_length: int, epochs: int, batch_size: int):
datasets_path = './datasets'
models_path = './models'
posttrained = './posttrained'
pickle_file = './pickles'
for dataset in await get_datasets(datasets_path):
print(f"[TRAINING] {dataset.replace('.csv', '')} ")
dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")
# dataframe = preprocess_data(dataframe)
dataframe.dropna(inplace = True)
standard_scaler, dataframe = await scale_data(dataframe, StandardScaler)
minmax_scaler, dataframe = await scale_data(dataframe, MinMaxScaler)
sequences, labels = await create_sequences(dataframe, sequence_length)
input_shape = (sequences.shape[1], sequences.shape[2])
if algorithm == "GRU":
model = await gru_model(input_shape)
elif algorithm == "LSTM":
model = await lstm_model(input_shape)
elif algorithm == "LSTM_GRU":
model = await lstm_gru_model(input_shape)
else: model = await lstm_model(input_shape)
train_size = int(len(sequences) * 0.8)
X_train, X_test = sequences[:train_size], sequences[train_size:]
y_train, y_test = labels[:train_size], labels[train_size:]
await train({
'model': model,
'model_file': model_file,
'sequence_length': sequence_length,
'epochs': epochs,
'batch_size': batch_size
}, X_train, y_train, X_test, y_test)
dataframe_json = {'Date': dataframe.index.tolist(), 'Close': dataframe['Close'].tolist()}
await save_json(
os.path.join(posttrained, f'{dataset.replace(".csv", "")}-posttrained.json'),
dataframe_json
)
joblib.dump(minmax_scaler, os.path.join(pickle_file, f'{dataset.replace(".csv", "")}_minmax_scaler.pickle'))
joblib.dump(standard_scaler, os.path.join(pickle_file, f'{dataset.replace(".csv", "")}_standard_scaler.pickle'))
model.load_weights(model_file)
model.save(model_file)
print("\n\n")
|