from env import Environment from policy import Policy from utils import myOptimizer import pandas as pd import numpy as np if __name__ == "__main__": data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv') # data['Local time'] = pd.to_datetime(data['Local time']) data = data.set_index('Local time') print(data.index.min(), data.index.max()) date_split = '19.09.2022 17:55:00.000 GMT-0500' train = data[:date_split] test = data[date_split:] print(train.head(10)) learning_rate = 0.01 first_momentum = 0.0 second_momentum = 0.0 transaction_cost = 0.0001 adaptation_rate = 0.01 state_size = 9 agent = Policy(input_channels=state_size) optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost) history = [] for i in range(1, state_size): c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close'] history.append(c) env = Environment(train, history=history) observation = env.reset() for _ in range(9, 12): action = agent(observation) observation, reward, _ = env.step(action) print(env.profits)