pmthangk09 commited on
Commit
0d1bfaa
·
1 Parent(s): d79400e

Initial integration

Browse files
Files changed (2) hide show
  1. app.py +61 -1
  2. rl_agent/test_env.py +4 -18
app.py CHANGED
@@ -3,14 +3,74 @@ import pandas as pd
3
  import plotly.graph_objects as go
4
  from plotly.subplots import make_subplots
5
 
 
 
 
 
 
 
 
 
6
 
7
  import datetime
8
  def get_time():
9
  return datetime.datetime.now().time()
10
 
11
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  counter = 0
13
- start_year, test_year = 2018, 2023
14
  datetime_column = "Date"
15
  df_data = pd.read_csv(f"./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023_processed.csv")
16
  df_data[datetime_column] = pd.to_datetime(df_data[datetime_column], format="%Y-%m-%d") # %d.%m.%Y %H:%M:%S.000 GMT%z
 
3
  import plotly.graph_objects as go
4
  from plotly.subplots import make_subplots
5
 
6
+ from env import Environment
7
+ from policy import Policy
8
+ from utils import myOptimizer
9
+
10
+ import torch
11
+ from collections import OrderedDict
12
+ from tqdm import tqdm
13
+
14
 
15
  import datetime
16
  def get_time():
17
  return datetime.datetime.now().time()
18
 
19
 
20
+ def init_rl_agent(train, test):
21
+ date_split = '01.09.2022 00:00:00.000 GMT-0500'
22
+
23
+ learning_rate = 0.001
24
+ first_momentum = 0.0
25
+ second_momentum = 0.0001
26
+ transaction_cost = 0.0001
27
+ adaptation_rate = 0.01
28
+ state_size = 15
29
+ equity = 1.0
30
+
31
+ agent = Policy(input_channels=state_size)
32
+ optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
33
+
34
+ history = []
35
+ for i in range(1, state_size):
36
+ c = train.iloc[i, :]['Close'] - train.iloc[i - 1, :]['Close']
37
+ history.append(c)
38
+
39
+ env = Environment(train, history=history, state_size=state_size)
40
+ observation = env.reset()
41
+
42
+ return env, agent, optimizer, state_size, observation, date_split, equity
43
+
44
+
45
+ def make_prediction(env, agent, optimizer, state_size, observation, data, date_split, equity):
46
+ model_gradients_history = dict()
47
+ checkpoint = OrderedDict()
48
+
49
+ for name, param in agent.named_parameters():
50
+ model_gradients_history.update({name: torch.zeros_like(param)})
51
+
52
+ for i in tqdm(range(state_size, len(data[:date_split]))):
53
+ observation = torch.as_tensor(observation).float()
54
+ action = agent(observation)
55
+ observation, reward, _ = env.step(action.data.to("cpu").numpy())
56
+
57
+ action.backward()
58
+
59
+ for name, param in agent.named_parameters():
60
+ grad_n = param.grad
61
+ param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
62
+ checkpoint[name] = param
63
+ model_gradients_history.update({name: grad_n})
64
+
65
+ if i > 10000:
66
+ equity += env.profit
67
+
68
+ optimizer.after_step(reward)
69
+ agent.load_state_dict(checkpoint)
70
+
71
+
72
  counter = 0
73
+ start_year, test_year = 2021, 2023
74
  datetime_column = "Date"
75
  df_data = pd.read_csv(f"./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023_processed.csv")
76
  df_data[datetime_column] = pd.to_datetime(df_data[datetime_column], format="%Y-%m-%d") # %d.%m.%Y %H:%M:%S.000 GMT%z
rl_agent/test_env.py CHANGED
@@ -12,6 +12,7 @@ import matplotlib.pyplot as plt
12
  from tqdm import tqdm
13
  from torch.utils.tensorboard import SummaryWriter
14
 
 
15
  if __name__ == "__main__":
16
  writer = SummaryWriter('runs/new_data_ex_7')
17
 
@@ -21,14 +22,13 @@ if __name__ == "__main__":
21
  data = data.set_index('Local time')
22
  print(data.index.min(), data.index.max())
23
 
24
- date_split = '19.09.2022 17:55:00.000 GMT-0500'
25
  # date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min
26
  # date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min
27
 
28
  train = data[:date_split]
29
  test = data[date_split:]
30
 
31
-
32
  learning_rate = 0.001
33
  first_momentum = 0.0
34
  second_momentum = 0.0001
@@ -40,8 +40,6 @@ if __name__ == "__main__":
40
  agent = Policy(input_channels=state_size)
41
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
42
 
43
-
44
-
45
  history = []
46
  for i in range(1, state_size):
47
  c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
@@ -49,7 +47,6 @@ if __name__ == "__main__":
49
 
50
  env = Environment(train, history=history, state_size=state_size)
51
  observation = env.reset()
52
-
53
 
54
  model_gradients_history = dict()
55
  checkpoint = OrderedDict()
@@ -57,20 +54,14 @@ if __name__ == "__main__":
57
  for name, param in agent.named_parameters():
58
  model_gradients_history.update({name: torch.zeros_like(param)})
59
 
60
-
61
-
62
  for i in tqdm(range(state_size, len(train))):
63
  observation = torch.as_tensor(observation).float()
64
  action = agent(observation)
65
  observation, reward, _ = env.step(action.data.to("cpu").numpy())
66
-
67
-
68
-
69
 
70
  action.backward()
71
 
72
  for name, param in agent.named_parameters():
73
-
74
  grad_n = param.grad
75
  param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
76
  checkpoint[name] = param
@@ -95,7 +86,6 @@ if __name__ == "__main__":
95
 
96
  # env = Environment(test, history=history, state_size=state_size)
97
  # observation = env.reset()
98
-
99
 
100
  # model_gradients_history = dict()
101
  # checkpoint = OrderedDict()
@@ -107,14 +97,9 @@ if __name__ == "__main__":
107
  # observation = torch.as_tensor(observation).float()
108
  # action = agent(observation)
109
  # observation, reward, _ = env.step(action.data.numpy())
110
-
111
-
112
-
113
-
114
  # action.backward()
115
 
116
  # for name, param in agent.named_parameters():
117
-
118
  # grad_n = param.grad
119
  # param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
120
  # checkpoint[name] = param
@@ -123,4 +108,5 @@ if __name__ == "__main__":
123
  # optimizer.after_step(reward)
124
  # agent.load_state_dict(checkpoint)
125
 
126
- print(env.profits)
 
 
12
  from tqdm import tqdm
13
  from torch.utils.tensorboard import SummaryWriter
14
 
15
+
16
  if __name__ == "__main__":
17
  writer = SummaryWriter('runs/new_data_ex_7')
18
 
 
22
  data = data.set_index('Local time')
23
  print(data.index.min(), data.index.max())
24
 
25
+ date_split = '01.09.2022 00:00:00.000 GMT-0500'
26
  # date_split = '25.08.2022 04:30:00.000 GMT-0500' # 30 min
27
  # date_split = '03.02.2023 15:30:00.000 GMT-0600' # 30 min
28
 
29
  train = data[:date_split]
30
  test = data[date_split:]
31
 
 
32
  learning_rate = 0.001
33
  first_momentum = 0.0
34
  second_momentum = 0.0001
 
40
  agent = Policy(input_channels=state_size)
41
  optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
42
 
 
 
43
  history = []
44
  for i in range(1, state_size):
45
  c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
 
47
 
48
  env = Environment(train, history=history, state_size=state_size)
49
  observation = env.reset()
 
50
 
51
  model_gradients_history = dict()
52
  checkpoint = OrderedDict()
 
54
  for name, param in agent.named_parameters():
55
  model_gradients_history.update({name: torch.zeros_like(param)})
56
 
 
 
57
  for i in tqdm(range(state_size, len(train))):
58
  observation = torch.as_tensor(observation).float()
59
  action = agent(observation)
60
  observation, reward, _ = env.step(action.data.to("cpu").numpy())
 
 
 
61
 
62
  action.backward()
63
 
64
  for name, param in agent.named_parameters():
 
65
  grad_n = param.grad
66
  param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
67
  checkpoint[name] = param
 
86
 
87
  # env = Environment(test, history=history, state_size=state_size)
88
  # observation = env.reset()
 
89
 
90
  # model_gradients_history = dict()
91
  # checkpoint = OrderedDict()
 
97
  # observation = torch.as_tensor(observation).float()
98
  # action = agent(observation)
99
  # observation, reward, _ = env.step(action.data.numpy())
 
 
 
 
100
  # action.backward()
101
 
102
  # for name, param in agent.named_parameters():
 
103
  # grad_n = param.grad
104
  # param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
105
  # checkpoint[name] = param
 
108
  # optimizer.after_step(reward)
109
  # agent.load_state_dict(checkpoint)
110
 
111
+ print(env.profits)
112
+