Spaces:
Runtime error
Runtime error
PZR0033
commited on
Commit
·
5fb2a02
1
Parent(s):
7bfc81f
update in policy
Browse files- rl_agent/test_env.py +45 -0
- rl_agent/utils.py +1 -1
rl_agent/test_env.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from env import Environment
|
2 |
+
from policy import Policy
|
3 |
+
from utils import myOptimizer
|
4 |
+
|
5 |
+
import pandas as pd
|
6 |
+
import numpy as np
|
7 |
+
|
8 |
+
if __name__ == "__main__":
|
9 |
+
|
10 |
+
data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
|
11 |
+
# data['Local time'] = pd.to_datetime(data['Local time'])
|
12 |
+
data = data.set_index('Local time')
|
13 |
+
print(data.index.min(), data.index.max())
|
14 |
+
|
15 |
+
date_split = '19.09.2022 17:55:00.000 GMT-0500'
|
16 |
+
train = data[:date_split]
|
17 |
+
test = data[date_split:]
|
18 |
+
print(train.head(10))
|
19 |
+
|
20 |
+
learning_rate = 0.01
|
21 |
+
first_momentum = 0.0
|
22 |
+
second_momentum = 0.0
|
23 |
+
transaction_cost = 0.0001
|
24 |
+
adaptation_rate = 0.01
|
25 |
+
state_size = 9
|
26 |
+
|
27 |
+
agent = Policy(input_channels=state_size)
|
28 |
+
optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
|
29 |
+
|
30 |
+
|
31 |
+
|
32 |
+
history = []
|
33 |
+
for i in range(1, state_size):
|
34 |
+
c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
|
35 |
+
history.append(c)
|
36 |
+
|
37 |
+
env = Environment(train, history=history)
|
38 |
+
observation = env.reset()
|
39 |
+
for _ in range(9, 12):
|
40 |
+
|
41 |
+
action = agent(observation)
|
42 |
+
observation, reward, _ = env.step(action)
|
43 |
+
|
44 |
+
print(env.profits)
|
45 |
+
|
rl_agent/utils.py
CHANGED
@@ -16,7 +16,7 @@ class myOptimizer():
|
|
16 |
numerator = self.mu_square - (self.mu * reward)
|
17 |
denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
18 |
|
19 |
-
gradient = numerator / denominator
|
20 |
|
21 |
self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
|
22 |
self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
|
|
|
16 |
numerator = self.mu_square - (self.mu * reward)
|
17 |
denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
18 |
|
19 |
+
gradient = numerator / (denominator + 1e-8)
|
20 |
|
21 |
self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
|
22 |
self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
|