PZR0033 commited on
Commit
5fb2a02
·
1 Parent(s): 7bfc81f

update in policy

Browse files
Files changed (2) hide show
  1. rl_agent/test_env.py +45 -0
  2. rl_agent/utils.py +1 -1
rl_agent/test_env.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from env import Environment
2
+ from policy import Policy
3
+ from utils import myOptimizer
4
+
5
+ import pandas as pd
6
+ import numpy as np
7
+
8
+ if __name__ == "__main__":
9
+
10
+ data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
11
+ # data['Local time'] = pd.to_datetime(data['Local time'])
12
+ data = data.set_index('Local time')
13
+ print(data.index.min(), data.index.max())
14
+
15
+ date_split = '19.09.2022 17:55:00.000 GMT-0500'
16
+ train = data[:date_split]
17
+ test = data[date_split:]
18
+ print(train.head(10))
19
+
20
+ learning_rate = 0.01
21
+ first_momentum = 0.0
22
+ second_momentum = 0.0
23
+ transaction_cost = 0.0001
24
+ adaptation_rate = 0.01
25
+ state_size = 9
26
+
27
+ agent = Policy(input_channels=state_size)
28
+ optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
29
+
30
+
31
+
32
+ history = []
33
+ for i in range(1, state_size):
34
+ c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
35
+ history.append(c)
36
+
37
+ env = Environment(train, history=history)
38
+ observation = env.reset()
39
+ for _ in range(9, 12):
40
+
41
+ action = agent(observation)
42
+ observation, reward, _ = env.step(action)
43
+
44
+ print(env.profits)
45
+
rl_agent/utils.py CHANGED
@@ -16,7 +16,7 @@ class myOptimizer():
16
  numerator = self.mu_square - (self.mu * reward)
17
  denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
18
 
19
- gradient = numerator / denominator
20
 
21
  self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
22
  self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
 
16
  numerator = self.mu_square - (self.mu * reward)
17
  denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
18
 
19
+ gradient = numerator / (denominator + 1e-8)
20
 
21
  self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
22
  self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)