Spaces:
Runtime error
Runtime error
from env import Environment | |
from policy import Policy | |
from utils import myOptimizer | |
import pandas as pd | |
import numpy as np | |
import torch | |
from collections import OrderedDict | |
import matplotlib.pyplot as plt | |
from tqdm import tqdm | |
from torch.utils.tensorboard import SummaryWriter | |
if __name__ == "__main__": | |
writer = SummaryWriter('runs/new_data_ex_7') | |
data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv') | |
# data = pd.read_csv('./data/EURUSD_Candlestick_30_M_BID_01.01.2021-04.02.2023.csv') | |
# data['Local time'] = pd.to_datetime(data['Local time']) | |
data = data.set_index('Local time') | |
print(data.index.min(), data.index.max()) | |
date_split = '01.09.2022 00:00:00.000 GMT-0500' | |
# date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min | |
# date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min | |
train = data[:date_split] | |
test = data[date_split:] | |
learning_rate = 0.001 | |
first_momentum = 0.0 | |
second_momentum = 0.0001 | |
transaction_cost = 0.0001 | |
adaptation_rate = 0.01 | |
state_size = 15 | |
equity = 1.0 | |
agent = Policy(input_channels=state_size) | |
optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost) | |
history = [] | |
for i in range(1, state_size): | |
c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close'] | |
history.append(c) | |
env = Environment(train, history=history, state_size=state_size) | |
observation = env.reset() | |
model_gradients_history = dict() | |
checkpoint = OrderedDict() | |
for name, param in agent.named_parameters(): | |
model_gradients_history.update({name: torch.zeros_like(param)}) | |
for i in tqdm(range(state_size, len(train))): | |
observation = torch.as_tensor(observation).float() | |
action = agent(observation) | |
observation, reward, _ = env.step(action.data.to("cpu").numpy()) | |
action.backward() | |
for name, param in agent.named_parameters(): | |
grad_n = param.grad | |
param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name]) | |
checkpoint[name] = param | |
model_gradients_history.update({name: grad_n}) | |
if i > 10000: | |
equity += env.profit | |
writer.add_scalar('equity', equity, i) | |
else: | |
writer.add_scalar('equity', 1.0, i) | |
optimizer.after_step(reward) | |
agent.load_state_dict(checkpoint) | |
########### | |
########### | |
# history = [] | |
# for i in range(1, state_size): | |
# c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close'] | |
# history.append(c) | |
# env = Environment(test, history=history, state_size=state_size) | |
# observation = env.reset() | |
# model_gradients_history = dict() | |
# checkpoint = OrderedDict() | |
# for name, param in agent.named_parameters(): | |
# model_gradients_history.update({name: torch.zeros_like(param)}) | |
# for _ in tqdm(range(state_size, len(test))): | |
# observation = torch.as_tensor(observation).float() | |
# action = agent(observation) | |
# observation, reward, _ = env.step(action.data.numpy()) | |
# action.backward() | |
# for name, param in agent.named_parameters(): | |
# grad_n = param.grad | |
# param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name]) | |
# checkpoint[name] = param | |
# model_gradients_history.update({name: grad_n}) | |
# optimizer.after_step(reward) | |
# agent.load_state_dict(checkpoint) | |
print(env.profits) | |