pmthangk09 commited on
Commit
ceeea5f
·
1 Parent(s): 0d1bfaa

Finish integration

Browse files
Files changed (1) hide show
  1. app.py +90 -39
app.py CHANGED
@@ -10,6 +10,7 @@ from utils import myOptimizer
10
  import torch
11
  from collections import OrderedDict
12
  from tqdm import tqdm
 
13
 
14
 
15
  import datetime
@@ -17,57 +18,53 @@ def get_time():
17
  return datetime.datetime.now().time()
18
 
19
 
20
- def init_rl_agent(train, test):
21
- date_split = '01.09.2022 00:00:00.000 GMT-0500'
 
22
 
23
- learning_rate = 0.001
24
- first_momentum = 0.0
25
- second_momentum = 0.0001
26
- transaction_cost = 0.0001
27
- adaptation_rate = 0.01
28
- state_size = 15
29
- equity = 1.0
30
 
31
- agent = Policy(input_channels=state_size)
32
- optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
33
 
34
- history = []
35
- for i in range(1, state_size):
36
- c = train.iloc[i, :]['Close'] - train.iloc[i - 1, :]['Close']
37
- history.append(c)
 
38
 
39
- env = Environment(train, history=history, state_size=state_size)
40
- observation = env.reset()
 
41
 
42
- return env, agent, optimizer, state_size, observation, date_split, equity
43
 
 
 
 
 
 
44
 
45
- def make_prediction(env, agent, optimizer, state_size, observation, data, date_split, equity):
46
- model_gradients_history = dict()
47
- checkpoint = OrderedDict()
48
 
49
  for name, param in agent.named_parameters():
50
- model_gradients_history.update({name: torch.zeros_like(param)})
 
 
 
51
 
52
- for i in tqdm(range(state_size, len(data[:date_split]))):
53
- observation = torch.as_tensor(observation).float()
54
- action = agent(observation)
55
- observation, reward, _ = env.step(action.data.to("cpu").numpy())
56
-
57
- action.backward()
58
-
59
- for name, param in agent.named_parameters():
60
- grad_n = param.grad
61
- param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
62
- checkpoint[name] = param
63
- model_gradients_history.update({name: grad_n})
64
 
65
- if i > 10000:
66
- equity += env.profit
67
 
68
- optimizer.after_step(reward)
69
- agent.load_state_dict(checkpoint)
70
 
 
 
 
 
71
 
72
  counter = 0
73
  start_year, test_year = 2021, 2023
@@ -83,18 +80,71 @@ dt_obs = set([d.strftime("%Y-%m-%d") for d in pd.to_datetime(df_data[datetime_co
83
  # Define dates with missing values
84
  dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt_obs)]
85
 
86
-
87
  df_data_test = df_data[df_data['Date'].dt.year == test_year]
88
  df_data_train = df_data[df_data['Date'].dt.year != test_year]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
 
91
  def trading_plot():
92
  global counter
93
  global df_data_train
 
 
 
94
 
95
  if counter < len(df_data_test):
96
  df_data_train = df_data_train.append(df_data_test.iloc[counter])
97
  counter += 1
 
 
 
 
 
 
 
 
98
  else:
99
  df_data_train = df_data
100
 
@@ -133,6 +183,7 @@ def trading_plot():
133
  # The UI of the demo defines here.
134
  with gr.Blocks() as demo:
135
  gr.Markdown("Auto trade bot.")
 
136
 
137
  # dt = gr.Textbox(label="Current time")
138
  # demo.queue().load(get_time, inputs=None, outputs=dt, every=1)
 
10
  import torch
11
  from collections import OrderedDict
12
  from tqdm import tqdm
13
+ import statistics
14
 
15
 
16
  import datetime
 
18
  return datetime.datetime.now().time()
19
 
20
 
21
+ def pretrain_rl_agent():
22
+ global equity
23
+ observations = env_train.reset()
24
 
25
+ for _ in tqdm(range(state_size, len(train))):
26
+ observations = torch.as_tensor(observations).float()
27
+ action = agent(observations)
28
+ observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
 
 
 
29
 
30
+ action.backward()
 
31
 
32
+ for name, param in agent.named_parameters():
33
+ grad_n = param.grad
34
+ param = param + optimizer.step(grad_n, reward, observations[-1], model_gradients_history[name])
35
+ checkpoint[name] = param
36
+ model_gradients_history.update({name: grad_n})
37
 
38
+ # equity += env_train.profit
39
+ optimizer.after_step(reward)
40
+ agent.load_state_dict(checkpoint)
41
 
 
42
 
43
+ def make_prediction(observations):
44
+ # observations: 0-14
45
+ action = agent(torch.as_tensor(observations).float())
46
+ # returned observation: 1- 15
47
+ observations, reward, _ = env_test.step(action.data.to("cpu").numpy())
48
 
49
+ action.backward()
 
 
50
 
51
  for name, param in agent.named_parameters():
52
+ grad_n = param.grad
53
+ param = param + optimizer.step(grad_n, reward, observations[-1], model_gradients_history[name])
54
+ checkpoint[name] = param
55
+ model_gradients_history.update({name: grad_n})
56
 
57
+ # equity += env_test.profit
58
+ optimizer.after_step(reward)
59
+ agent.load_state_dict(checkpoint)
 
 
 
 
 
 
 
 
 
60
 
61
+ return action, observations # [-1.0, 1.0] * leverage
 
62
 
 
 
63
 
64
+ # ----------------------------------------------------------------------------------------------------------------------
65
+ # For visualization
66
+ # ----------------------------------------------------------------------------------------------------------------------
67
+ profit = 0.0
68
 
69
  counter = 0
70
  start_year, test_year = 2021, 2023
 
80
  # Define dates with missing values
81
  dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt_obs)]
82
 
 
83
  df_data_test = df_data[df_data['Date'].dt.year == test_year]
84
  df_data_train = df_data[df_data['Date'].dt.year != test_year]
85
+ # ----------------------------------------------------------------------------------------------------------------------
86
+
87
+ # ----------------------------------------------------------------------------------------------------------------------
88
+ # For RL Agent
89
+ # ----------------------------------------------------------------------------------------------------------------------
90
+ data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
91
+ data = data.tail(50000)
92
+ data = data.set_index('Local time')
93
+ date_split = '01.01.2023 16:04:00.000 GMT-0600'
94
+
95
+ learning_rate = 0.001
96
+ first_momentum = 0.0
97
+ second_momentum = 0.0001
98
+ transaction_cost = 0.0001
99
+ adaptation_rate = 0.01
100
+ state_size = 15
101
+ equity = 1.0
102
+
103
+ train = data[:date_split]
104
+ test = train.iloc[-(state_size - 1)] + data[date_split:]
105
+
106
+ # Initialize agent and optimizer
107
+ agent = Policy(input_channels=state_size)
108
+ optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
109
+
110
+ history = []
111
+ for i in range(1, state_size):
112
+ c = train.iloc[i, :]['Close'] - train.iloc[i - 1, :]['Close']
113
+ history.append(c)
114
+
115
+ # Initialize train and test environments
116
+ env_train = Environment(train, history=history, state_size=state_size)
117
+ env_test = Environment(test, history=env_train.history, state_size=state_size)
118
+
119
+ model_gradients_history = dict()
120
+ checkpoint = OrderedDict()
121
+
122
+ for name, param in agent.named_parameters():
123
+ model_gradients_history.update({name: torch.zeros_like(param)})
124
+
125
+ pretrain_rl_agent()
126
+ observations = env_test.reset()
127
+ # ----------------------------------------------------------------------------------------------------------------------
128
 
129
 
130
  def trading_plot():
131
  global counter
132
  global df_data_train
133
+ global observations
134
+ global profit
135
+ actions = []
136
 
137
  if counter < len(df_data_test):
138
  df_data_train = df_data_train.append(df_data_test.iloc[counter])
139
  counter += 1
140
+
141
+ last_observation = observations[-1]
142
+ for i in range(1440):
143
+ action, observations = make_prediction(observations)
144
+ actions.append(action.item())
145
+ position = statistics.mean(actions)
146
+ profit += -1.0 * (last_observation - observations[-1]) * position
147
+
148
  else:
149
  df_data_train = df_data
150
 
 
183
  # The UI of the demo defines here.
184
  with gr.Blocks() as demo:
185
  gr.Markdown("Auto trade bot.")
186
+ gr.Markdown(f"Profit: {profit}")
187
 
188
  # dt = gr.Textbox(label="Current time")
189
  # demo.queue().load(get_time, inputs=None, outputs=dt, every=1)