ngthanhtinqn commited on
Commit
f7ed643
·
1 Parent(s): 2863f52

add equity

Browse files
Files changed (2) hide show
  1. rl_agent/env.py +3 -2
  2. rl_agent/test_env.py +42 -30
rl_agent/env.py CHANGED
@@ -29,14 +29,15 @@ class Environment:
29
 
30
  Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
31
  reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
32
- profit = self.position_value * Zt
33
- self.profits += profit
34
 
35
  # set next time
36
  self.t += 1
37
  self.position_value = act
38
 
39
  self.history.pop(0)
 
40
  self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
41
 
42
  self.position_value = self.position_value.item()
 
29
 
30
  Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
31
  reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
32
+ self.profit = self.position_value * Zt
33
+ self.profits += self.profit
34
 
35
  # set next time
36
  self.t += 1
37
  self.position_value = act
38
 
39
  self.history.pop(0)
40
+
41
  self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
42
 
43
  self.position_value = self.position_value.item()
rl_agent/test_env.py CHANGED
@@ -9,18 +9,26 @@ from collections import OrderedDict
9
 
10
  import matplotlib.pyplot as plt
11
 
 
 
 
12
  if __name__ == "__main__":
 
13
 
14
- data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
 
15
  # data['Local time'] = pd.to_datetime(data['Local time'])
16
  data = data.set_index('Local time')
17
  print(data.index.min(), data.index.max())
18
 
19
- date_split = '19.09.2022 17:55:00.000 GMT-0500'
 
 
 
 
20
  train = data[:date_split]
21
  test = data[date_split:]
22
 
23
- initial_money = 10.0
24
 
25
  learning_rate = 0.001
26
  first_momentum = 0.0
@@ -28,6 +36,7 @@ if __name__ == "__main__":
28
  transaction_cost = 0.0001
29
  adaptation_rate = 0.01
30
  state_size = 15
 
31
 
32
  agent = Policy(input_channels=state_size)
33
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
@@ -50,10 +59,8 @@ if __name__ == "__main__":
50
  model_gradients_history.update({name: torch.zeros_like(param)})
51
 
52
 
53
- p = []
54
-
55
 
56
- for _ in range(state_size, 28800):
57
  observation = torch.as_tensor(observation).float()
58
  action = agent(observation)
59
  observation, reward, _ = env.step(action.data.to("cpu").numpy())
@@ -69,47 +76,52 @@ if __name__ == "__main__":
69
  param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
70
  checkpoint[name] = param
71
  model_gradients_history.update({name: grad_n})
72
-
73
- p.append(env.profits)
 
 
 
 
 
74
  optimizer.after_step(reward)
75
  agent.load_state_dict(checkpoint)
76
 
77
  ###########
78
  ###########
79
 
80
- history = []
81
- for i in range(1, state_size):
82
- c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close']
83
- history.append(c)
84
 
85
- env = Environment(test, history=history, state_size=state_size)
86
- observation = env.reset()
87
 
88
 
89
- model_gradients_history = dict()
90
- checkpoint = OrderedDict()
91
 
92
- for name, param in agent.named_parameters():
93
- model_gradients_history.update({name: torch.zeros_like(param)})
94
 
95
- for _ in range(state_size, 14400):
96
- observation = torch.as_tensor(observation).float()
97
- action = agent(observation)
98
- observation, reward, _ = env.step(action.data.numpy())
99
 
100
 
101
 
102
 
103
- action.backward()
104
 
105
- for name, param in agent.named_parameters():
106
 
107
- grad_n = param.grad
108
- param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
109
- checkpoint[name] = param
110
- model_gradients_history.update({name: grad_n})
111
 
112
- optimizer.after_step(reward)
113
- agent.load_state_dict(checkpoint)
114
 
115
  print(env.profits)
 
9
 
10
  import matplotlib.pyplot as plt
11
 
12
+ from tqdm import tqdm
13
+ from torch.utils.tensorboard import SummaryWriter
14
+
15
  if __name__ == "__main__":
16
+ writer = SummaryWriter('runs/new_data_ex_7')
17
 
18
+ # data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
19
+ data = pd.read_csv('./data/EURUSD_Candlestick_30_M_BID_01.01.2021-04.02.2023.csv')
20
  # data['Local time'] = pd.to_datetime(data['Local time'])
21
  data = data.set_index('Local time')
22
  print(data.index.min(), data.index.max())
23
 
24
+ # date_split = '19.09.2022 17:55:00.000 GMT-0500'
25
+ # date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min
26
+
27
+ date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min
28
+
29
  train = data[:date_split]
30
  test = data[date_split:]
31
 
 
32
 
33
  learning_rate = 0.001
34
  first_momentum = 0.0
 
36
  transaction_cost = 0.0001
37
  adaptation_rate = 0.01
38
  state_size = 15
39
+ equity = 1.0
40
 
41
  agent = Policy(input_channels=state_size)
42
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
 
59
  model_gradients_history.update({name: torch.zeros_like(param)})
60
 
61
 
 
 
62
 
63
+ for i in tqdm(range(state_size, len(train))):
64
  observation = torch.as_tensor(observation).float()
65
  action = agent(observation)
66
  observation, reward, _ = env.step(action.data.to("cpu").numpy())
 
76
  param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
77
  checkpoint[name] = param
78
  model_gradients_history.update({name: grad_n})
79
+
80
+ if i > 10000:
81
+ equity += env.profit
82
+ writer.add_scalar('equity', equity, i)
83
+ else:
84
+ writer.add_scalar('equity', 1.0, i)
85
+
86
  optimizer.after_step(reward)
87
  agent.load_state_dict(checkpoint)
88
 
89
  ###########
90
  ###########
91
 
92
+ # history = []
93
+ # for i in range(1, state_size):
94
+ # c = test.iloc[i, :]['Close'] - test.iloc[i-1, :]['Close']
95
+ # history.append(c)
96
 
97
+ # env = Environment(test, history=history, state_size=state_size)
98
+ # observation = env.reset()
99
 
100
 
101
+ # model_gradients_history = dict()
102
+ # checkpoint = OrderedDict()
103
 
104
+ # for name, param in agent.named_parameters():
105
+ # model_gradients_history.update({name: torch.zeros_like(param)})
106
 
107
+ # for _ in tqdm(range(state_size, len(test))):
108
+ # observation = torch.as_tensor(observation).float()
109
+ # action = agent(observation)
110
+ # observation, reward, _ = env.step(action.data.numpy())
111
 
112
 
113
 
114
 
115
+ # action.backward()
116
 
117
+ # for name, param in agent.named_parameters():
118
 
119
+ # grad_n = param.grad
120
+ # param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
121
+ # checkpoint[name] = param
122
+ # model_gradients_history.update({name: grad_n})
123
 
124
+ # optimizer.after_step(reward)
125
+ # agent.load_state_dict(checkpoint)
126
 
127
  print(env.profits)