PZR0033 commited on
Commit
2863f52
·
1 Parent(s): 8e3072c

improved stability

Browse files
Files changed (3) hide show
  1. rl_agent/policy.py +6 -6
  2. rl_agent/test_env.py +50 -4
  3. rl_agent/utils.py +1 -1
rl_agent/policy.py CHANGED
@@ -8,19 +8,19 @@ class Policy(nn.Module):
8
 
9
  super(Policy, self).__init__()
10
 
11
- self.layer1 = nn.Linear(input_channels, 2 * input_channels)
12
  self.tanh1 = nn.Tanh()
13
- self.layer2 = nn.Linear(2 * input_channels, 1)
14
- self.tanh2 = nn.Tanh()
15
 
16
  def forward(self, state):
17
 
18
  hidden = self.layer1(state)
19
  hidden = self.tanh1(hidden)
20
- hidden = self.layer2(hidden)
21
- action = self.tanh2(hidden)
22
 
23
- return action
24
 
25
 
26
 
 
8
 
9
  super(Policy, self).__init__()
10
 
11
+ self.layer1 = nn.Linear(input_channels, 1)
12
  self.tanh1 = nn.Tanh()
13
+ # self.layer2 = nn.Linear(2 * input_channels, 1)
14
+ # self.tanh2 = nn.Tanh()
15
 
16
  def forward(self, state):
17
 
18
  hidden = self.layer1(state)
19
  hidden = self.tanh1(hidden)
20
+ # hidden = self.layer2(hidden)
21
+ # action = self.tanh2(hidden)
22
 
23
+ return hidden
24
 
25
 
26
 
rl_agent/test_env.py CHANGED
@@ -7,6 +7,8 @@ import numpy as np
7
  import torch
8
  from collections import OrderedDict
9
 
 
 
10
  if __name__ == "__main__":
11
 
12
  data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
@@ -19,10 +21,10 @@ if __name__ == "__main__":
19
  test = data[date_split:]
20
 
21
  initial_money = 10.0
22
-
23
- learning_rate = 0.01
24
  first_momentum = 0.0
25
- second_momentum = 0.0
26
  transaction_cost = 0.0001
27
  adaptation_rate = 0.01
28
  state_size = 15
@@ -47,7 +49,50 @@ if __name__ == "__main__":
47
  for name, param in agent.named_parameters():
48
  model_gradients_history.update({name: torch.zeros_like(param)})
49
 
50
- for _ in range(state_size, 1440):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  observation = torch.as_tensor(observation).float()
52
  action = agent(observation)
53
  observation, reward, _ = env.step(action.data.numpy())
@@ -64,6 +109,7 @@ if __name__ == "__main__":
64
  checkpoint[name] = param
65
  model_gradients_history.update({name: grad_n})
66
 
 
67
  agent.load_state_dict(checkpoint)
68
 
69
  print(env.profits)
 
7
  import torch
8
  from collections import OrderedDict
9
 
10
+ import matplotlib.pyplot as plt
11
+
12
  if __name__ == "__main__":
13
 
14
  data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
 
21
  test = data[date_split:]
22
 
23
  initial_money = 10.0
24
+
25
+ learning_rate = 0.001
26
  first_momentum = 0.0
27
+ second_momentum = 0.0001
28
  transaction_cost = 0.0001
29
  adaptation_rate = 0.01
30
  state_size = 15
 
49
  for name, param in agent.named_parameters():
50
  model_gradients_history.update({name: torch.zeros_like(param)})
51
 
52
+
53
+ p = []
54
+
55
+
56
+ for _ in range(state_size, 28800):
57
+ observation = torch.as_tensor(observation).float()
58
+ action = agent(observation)
59
+ observation, reward, _ = env.step(action.data.to("cpu").numpy())
60
+
61
+
62
+
63
+
64
+ action.backward()
65
+
66
+ for name, param in agent.named_parameters():
67
+
68
+ grad_n = param.grad
69
+ param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
70
+ checkpoint[name] = param
71
+ model_gradients_history.update({name: grad_n})
72
+
73
+ p.append(env.profits)
74
+ optimizer.after_step(reward)
75
+ agent.load_state_dict(checkpoint)
76
+
77
+ ###########
78
+ ###########
79
+
80
+ history = []
81
+ for i in range(1, state_size):
82
+ c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close']
83
+ history.append(c)
84
+
85
+ env = Environment(test, history=history, state_size=state_size)
86
+ observation = env.reset()
87
+
88
+
89
+ model_gradients_history = dict()
90
+ checkpoint = OrderedDict()
91
+
92
+ for name, param in agent.named_parameters():
93
+ model_gradients_history.update({name: torch.zeros_like(param)})
94
+
95
+ for _ in range(state_size, 14400):
96
  observation = torch.as_tensor(observation).float()
97
  action = agent(observation)
98
  observation, reward, _ = env.step(action.data.numpy())
 
109
  checkpoint[name] = param
110
  model_gradients_history.update({name: grad_n})
111
 
112
+ optimizer.after_step(reward)
113
  agent.load_state_dict(checkpoint)
114
 
115
  print(env.profits)
rl_agent/utils.py CHANGED
@@ -15,7 +15,7 @@ class myOptimizer():
15
  numerator = self.mu_square - (self.mu * reward)
16
  denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
17
 
18
- gradient = numerator / (denominator + 1e-8)
19
 
20
  current_grad = (-1.0 * self.transaction_cost * grad_n)
21
 
 
15
  numerator = self.mu_square - (self.mu * reward)
16
  denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
17
 
18
+ gradient = numerator / denominator
19
 
20
  current_grad = (-1.0 * self.transaction_cost * grad_n)
21