Spaces:

tebakaja
/

tebakaja_cryptocurrency_space-4

Sleeping

File size: 2,665 Bytes

e8ac98a

import os
import joblib
import argparse
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler


from training.trainer import train
from training.post_processor import save_json, inverse_transform
from training.data_processor import (
	scale_data,
	get_datasets,
	preprocess_data,
	create_sequences
)

from training.model_builder import (
	gru_model,
	lstm_model,
	lstm_gru_model
)


from warnings import filterwarnings
filterwarnings('ignore')

async def main(algorithm: str, sequence_length: int, epochs: int, batch_size: int):
    datasets_path = './datasets'
    models_path   = './models'
    posttrained   = './posttrained'
    pickle_file   = './pickles'


    for dataset in await get_datasets(datasets_path):
        print(f"[TRAINING] {dataset.replace('.csv', '')} ")

        dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
        model_file = os.path.join(models_path, f"{dataset.replace('.csv', '')}.keras")

        # dataframe = preprocess_data(dataframe)
        dataframe.dropna(inplace = True)
        standard_scaler, dataframe = await scale_data(dataframe, StandardScaler)
        minmax_scaler, dataframe = await scale_data(dataframe, MinMaxScaler)

        sequences, labels = await create_sequences(dataframe, sequence_length)
        input_shape = (sequences.shape[1], sequences.shape[2])

        if algorithm == "GRU":
            model = await gru_model(input_shape)

        elif algorithm == "LSTM":
            model = await lstm_model(input_shape)

        elif algorithm == "LSTM_GRU":
            model = await lstm_gru_model(input_shape)

        else: model = await lstm_model(input_shape)

        train_size = int(len(sequences) * 0.8)
        X_train, X_test = sequences[:train_size], sequences[train_size:]
        y_train, y_test = labels[:train_size], labels[train_size:]

        await train({
			'model': model,
			'model_file': model_file,
			'sequence_length': sequence_length,
			'epochs': epochs,
			'batch_size': batch_size
		}, X_train, y_train, X_test, y_test)

        dataframe_json = {'Date': dataframe.index.tolist(), 'Close': dataframe['Close'].tolist()}

        await save_json(
          os.path.join(posttrained, f'{dataset.replace(".csv", "")}-posttrained.json'),
          dataframe_json
        )

        joblib.dump(minmax_scaler, os.path.join(pickle_file, f'{dataset.replace(".csv", "")}_minmax_scaler.pickle'))
        joblib.dump(standard_scaler, os.path.join(pickle_file, f'{dataset.replace(".csv", "")}_standard_scaler.pickle'))

        model.load_weights(model_file)
        model.save(model_file)

        print("\n\n")