PZR0033 commited on
Commit
8e3072c
·
1 Parent(s): 634b732

final version without initial equity

Browse files
Files changed (3) hide show
  1. rl_agent/env.py +3 -2
  2. rl_agent/test_env.py +29 -8
  3. rl_agent/utils.py +11 -10
rl_agent/env.py CHANGED
@@ -4,10 +4,11 @@ import torch
4
 
5
  class Environment:
6
 
7
- def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9]):
8
  self.data = data
9
  self.history = history
10
  self.history_t = history_t
 
11
  self.cost_rate = 0.0001
12
  self.reset()
13
 
@@ -16,7 +17,7 @@ class Environment:
16
  self.done = False
17
  self.profits = 0
18
  self.position_value = 0.
19
- self.history = self.history[:7]
20
  return [self.position_value] + self.history # obs
21
 
22
  def step(self, act):
 
4
 
5
  class Environment:
6
 
7
+ def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9], state_size=9):
8
  self.data = data
9
  self.history = history
10
  self.history_t = history_t
11
+ self.state_size = state_size
12
  self.cost_rate = 0.0001
13
  self.reset()
14
 
 
17
  self.done = False
18
  self.profits = 0
19
  self.position_value = 0.
20
+ self.history = self.history[:self.state_size - 1]
21
  return [self.position_value] + self.history # obs
22
 
23
  def step(self, act):
rl_agent/test_env.py CHANGED
@@ -5,6 +5,7 @@ from utils import myOptimizer
5
  import pandas as pd
6
  import numpy as np
7
  import torch
 
8
 
9
  if __name__ == "__main__":
10
 
@@ -16,16 +17,17 @@ if __name__ == "__main__":
16
  date_split = '19.09.2022 17:55:00.000 GMT-0500'
17
  train = data[:date_split]
18
  test = data[date_split:]
19
- print(train.head(10))
20
 
 
 
21
  learning_rate = 0.01
22
  first_momentum = 0.0
23
  second_momentum = 0.0
24
  transaction_cost = 0.0001
25
  adaptation_rate = 0.01
26
- state_size = 8
27
 
28
- agent = Policy(input_channels=state_size).float()
29
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
30
 
31
 
@@ -35,14 +37,33 @@ if __name__ == "__main__":
35
  c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
36
  history.append(c)
37
 
38
- env = Environment(train, history=history)
39
  observation = env.reset()
 
 
 
 
 
 
 
40
 
41
- for _ in range(9, 12):
42
- print(type(observation))
43
- observation = torch.as_tensor(observation)
44
  action = agent(observation)
45
  observation, reward, _ = env.step(action.data.numpy())
46
 
47
- print(env.profits)
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  import pandas as pd
6
  import numpy as np
7
  import torch
8
+ from collections import OrderedDict
9
 
10
  if __name__ == "__main__":
11
 
 
17
  date_split = '19.09.2022 17:55:00.000 GMT-0500'
18
  train = data[:date_split]
19
  test = data[date_split:]
 
20
 
21
+ initial_money = 10.0
22
+
23
  learning_rate = 0.01
24
  first_momentum = 0.0
25
  second_momentum = 0.0
26
  transaction_cost = 0.0001
27
  adaptation_rate = 0.01
28
+ state_size = 15
29
 
30
+ agent = Policy(input_channels=state_size)
31
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
32
 
33
 
 
37
  c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
38
  history.append(c)
39
 
40
+ env = Environment(train, history=history, state_size=state_size)
41
  observation = env.reset()
42
+
43
+
44
+ model_gradients_history = dict()
45
+ checkpoint = OrderedDict()
46
+
47
+ for name, param in agent.named_parameters():
48
+ model_gradients_history.update({name: torch.zeros_like(param)})
49
 
50
+ for _ in range(state_size, 1440):
51
+ observation = torch.as_tensor(observation).float()
 
52
  action = agent(observation)
53
  observation, reward, _ = env.step(action.data.numpy())
54
 
 
55
 
56
+
57
+
58
+ action.backward()
59
+
60
+ for name, param in agent.named_parameters():
61
+
62
+ grad_n = param.grad
63
+ param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
64
+ checkpoint[name] = param
65
+ model_gradients_history.update({name: grad_n})
66
+
67
+ agent.load_state_dict(checkpoint)
68
+
69
+ print(env.profits)
rl_agent/utils.py CHANGED
@@ -8,27 +8,28 @@ class myOptimizer():
8
  self.mu = mu
9
  self.mu_square = mu_square
10
  self.adaptation_rate = adaptation_rate
11
- self.last_gradient = 0.0
12
  self.transaction_cost = transaction_cost
13
 
14
- def step(self, grad_n, reward, last_observation):
15
 
16
  numerator = self.mu_square - (self.mu * reward)
17
- denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
18
 
19
  gradient = numerator / (denominator + 1e-8)
20
 
21
- self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
22
- self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
23
-
24
  current_grad = (-1.0 * self.transaction_cost * grad_n)
25
- previous_grad = (last_observation + self.transaction_cost) * self.last_gradient
26
 
27
- gradient = gradient * (current_grad + previous_grad)
 
 
28
 
29
- self.last_gradient = grad_n
30
 
31
- return self.lr * gradient
 
 
 
 
32
 
33
 
34
 
 
8
  self.mu = mu
9
  self.mu_square = mu_square
10
  self.adaptation_rate = adaptation_rate
 
11
  self.transaction_cost = transaction_cost
12
 
13
+ def step(self, grad_n, reward, last_observation, last_gradient):
14
 
15
  numerator = self.mu_square - (self.mu * reward)
16
+ denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
17
 
18
  gradient = numerator / (denominator + 1e-8)
19
 
 
 
 
20
  current_grad = (-1.0 * self.transaction_cost * grad_n)
 
21
 
22
+ previous_grad = (last_observation + self.transaction_cost) * last_gradient
23
+
24
+ gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
25
 
26
+ return torch.as_tensor(self.lr * gradient)
27
 
28
+ def after_step(self, reward):
29
+
30
+ self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
31
+ self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
32
+
33
 
34
 
35