from env import Environment from policy import Policy from utils import myOptimizer import pandas as pd import numpy as np import torch from collections import OrderedDict import matplotlib.pyplot as plt if __name__ == "__main__": data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv') # data['Local time'] = pd.to_datetime(data['Local time']) data = data.set_index('Local time') print(data.index.min(), data.index.max()) date_split = '19.09.2022 17:55:00.000 GMT-0500' train = data[:date_split] test = data[date_split:] initial_money = 10.0 learning_rate = 0.001 first_momentum = 0.0 second_momentum = 0.0001 transaction_cost = 0.0001 adaptation_rate = 0.01 state_size = 15 agent = Policy(input_channels=state_size) optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost) history = [] for i in range(1, state_size): c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close'] history.append(c) env = Environment(train, history=history, state_size=state_size) observation = env.reset() model_gradients_history = dict() checkpoint = OrderedDict() for name, param in agent.named_parameters(): model_gradients_history.update({name: torch.zeros_like(param)}) p = [] for _ in range(state_size, 28800): observation = torch.as_tensor(observation).float() action = agent(observation) observation, reward, _ = env.step(action.data.to("cpu").numpy()) action.backward() for name, param in agent.named_parameters(): grad_n = param.grad param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name]) checkpoint[name] = param model_gradients_history.update({name: grad_n}) p.append(env.profits) optimizer.after_step(reward) agent.load_state_dict(checkpoint) ########### ########### history = [] for i in range(1, state_size): c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close'] history.append(c) env = Environment(test, history=history, state_size=state_size) observation = env.reset() model_gradients_history = dict() checkpoint = OrderedDict() for name, param in agent.named_parameters(): model_gradients_history.update({name: torch.zeros_like(param)}) for _ in range(state_size, 14400): observation = torch.as_tensor(observation).float() action = agent(observation) observation, reward, _ = env.step(action.data.numpy()) action.backward() for name, param in agent.named_parameters(): grad_n = param.grad param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name]) checkpoint[name] = param model_gradients_history.update({name: grad_n}) optimizer.after_step(reward) agent.load_state_dict(checkpoint) print(env.profits)