pmthangk09 commited on
Commit
53b9021
·
1 Parent(s): c3e8fb1

Visualize total profits

Browse files
Files changed (4) hide show
  1. app.py +30 -16
  2. rl_agent/env.py +4 -1
  3. rl_agent/policy.py +0 -9
  4. rl_agent/utils.py +0 -5
app.py CHANGED
@@ -23,6 +23,10 @@ def get_profit():
23
  return profit
24
 
25
 
 
 
 
 
26
  def pretrain_rl_agent():
27
  global equity
28
  observations = env_train.reset()
@@ -31,7 +35,7 @@ def pretrain_rl_agent():
31
  observations = torch.as_tensor(observations).float()
32
  action = agent(observations)
33
  observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
34
- reward *= 1e3
35
 
36
  action.backward()
37
 
@@ -88,17 +92,19 @@ dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt
88
 
89
  df_data_test = df_data[df_data['Date'].dt.year == test_year]
90
  df_data_train = df_data[df_data['Date'].dt.year != test_year]
 
 
91
  # ----------------------------------------------------------------------------------------------------------------------
92
 
93
  # ----------------------------------------------------------------------------------------------------------------------
94
  # For RL Agent
95
  # ----------------------------------------------------------------------------------------------------------------------
96
  data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
97
- data = data.tail(50000)
98
  data = data.set_index('Local time')
99
- date_split = '01.01.2023 16:04:00.000 GMT-0600'
100
 
101
- learning_rate = 0.01
102
  first_momentum = 0.0
103
  second_momentum = 0.0001
104
  transaction_cost = 0.0001
@@ -107,7 +113,7 @@ state_size = 15
107
  equity = 1.0
108
 
109
  train = data[:date_split]
110
- test = train.iloc[-(state_size - 1)] + data[date_split:]
111
 
112
  # Initialize agent and optimizer
113
  agent = Policy(input_channels=state_size)
@@ -120,7 +126,13 @@ for i in range(1, state_size):
120
 
121
  # Initialize train and test environments
122
  env_train = Environment(train, history=history, state_size=state_size)
123
- env_test = Environment(test, history=env_train.history, state_size=state_size)
 
 
 
 
 
 
124
 
125
  model_gradients_history = dict()
126
  checkpoint = OrderedDict()
@@ -149,8 +161,8 @@ def trading_plot():
149
  action, observations = make_prediction(observations)
150
  actions.append(action.item())
151
  position = statistics.mean(actions)
152
- profit += -1.0 * (last_observation - observations[-1]) * position
153
-
154
  else:
155
  df_data_train = df_data
156
 
@@ -188,8 +200,8 @@ def trading_plot():
188
 
189
  # The UI of the demo defines here.
190
  with gr.Blocks() as demo:
191
- gr.Markdown("Auto trade bot.")
192
- # gr.Markdown(f"Profit: {profit}")
193
 
194
  dt = gr.Textbox(label="Total profit")
195
  demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
@@ -200,18 +212,20 @@ with gr.Blocks() as demo:
200
 
201
  with gr.Row():
202
  with gr.Column():
203
- gr.Markdown("User Interactive panel.")
204
  amount = gr.components.Textbox(value="", label="Amount", interactive=True)
205
  with gr.Row():
206
  buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
207
  sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
208
  hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
 
209
  with gr.Column():
210
- gr.Markdown("Trade bot history.")
211
- df_data_train = pd.DataFrame(columns=["Action", "Amount", "Profit"])
212
- trade_bot_table = gr.Dataframe(df_data_train)
213
- # show trade box history in a table or something
214
- gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
 
215
 
216
  demo.launch()
217
 
 
23
  return profit
24
 
25
 
26
+ # def update_table():
27
+ # global
28
+
29
+
30
  def pretrain_rl_agent():
31
  global equity
32
  observations = env_train.reset()
 
35
  observations = torch.as_tensor(observations).float()
36
  action = agent(observations)
37
  observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
38
+ # reward *= 1e3
39
 
40
  action.backward()
41
 
 
92
 
93
  df_data_test = df_data[df_data['Date'].dt.year == test_year]
94
  df_data_train = df_data[df_data['Date'].dt.year != test_year]
95
+
96
+ df_data_train_viz = pd.DataFrame(columns=["Action", "Amount", "Profit"])
97
  # ----------------------------------------------------------------------------------------------------------------------
98
 
99
  # ----------------------------------------------------------------------------------------------------------------------
100
  # For RL Agent
101
  # ----------------------------------------------------------------------------------------------------------------------
102
  data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
103
+ data = data.head(600000)
104
  data = data.set_index('Local time')
105
+ date_split = '31.01.2022 03:29:00.000 GMT-0600'
106
 
107
+ learning_rate = 0.001
108
  first_momentum = 0.0
109
  second_momentum = 0.0001
110
  transaction_cost = 0.0001
 
113
  equity = 1.0
114
 
115
  train = data[:date_split]
116
+ test = pd.concat([train.tail(state_size), data[date_split:]])
117
 
118
  # Initialize agent and optimizer
119
  agent = Policy(input_channels=state_size)
 
126
 
127
  # Initialize train and test environments
128
  env_train = Environment(train, history=history, state_size=state_size)
129
+
130
+ history = []
131
+ for i in range(1, state_size):
132
+ c = test.iloc[i, :]['Close'] - test.iloc[i - 1, :]['Close']
133
+ history.append(c)
134
+
135
+ env_test = Environment(test, history=history, state_size=state_size)
136
 
137
  model_gradients_history = dict()
138
  checkpoint = OrderedDict()
 
161
  action, observations = make_prediction(observations)
162
  actions.append(action.item())
163
  position = statistics.mean(actions)
164
+ # profit += -1.0 * (last_observation - observations[-1]) * position
165
+ profit = env_test.profits
166
  else:
167
  df_data_train = df_data
168
 
 
200
 
201
  # The UI of the demo defines here.
202
  with gr.Blocks() as demo:
203
+ gr.Markdown("Auto AI Trading Bot")
204
+ gr.Markdown(f"Investment: $100,000")
205
 
206
  dt = gr.Textbox(label="Total profit")
207
  demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
 
212
 
213
  with gr.Row():
214
  with gr.Column():
215
+ gr.Markdown("User Interactive panel")
216
  amount = gr.components.Textbox(value="", label="Amount", interactive=True)
217
  with gr.Row():
218
  buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
219
  sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
220
  hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
221
+
222
  with gr.Column():
223
+ gr.Markdown("Trade bot history")
224
+
225
+ # trade_bot_table = gr.Dataframe(df_data_train_viz)
226
+ # demo.queue().load(update_table, inputs=None, outputs=trade_bot_table, every=1)
227
+ # Show trade box history in a table or something
228
+ # gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
229
 
230
  demo.launch()
231
 
rl_agent/env.py CHANGED
@@ -21,10 +21,13 @@ class Environment:
21
  return [self.position_value] + self.history # obs
22
 
23
  def step(self, act):
 
 
 
24
  reward = 0
25
 
26
  # act = 0: stay, act > 0: buy, act < 0: sell
27
- #Additive profits
28
  cost_amount = np.abs(act-self.position_value)
29
 
30
  Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
 
21
  return [self.position_value] + self.history # obs
22
 
23
  def step(self, act):
24
+ # if self.t == len(self.data) - 1:
25
+ # return self.history + [0], 0, False
26
+
27
  reward = 0
28
 
29
  # act = 0: stay, act > 0: buy, act < 0: sell
30
+ # Additive profits
31
  cost_amount = np.abs(act-self.position_value)
32
 
33
  Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
rl_agent/policy.py CHANGED
@@ -1,27 +1,18 @@
1
- import numpy as np
2
- import torch
3
  import torch.nn as nn
4
 
5
 
6
  class Policy(nn.Module):
7
  def __init__(self, input_channels=8):
8
-
9
  super(Policy, self).__init__()
10
-
11
  self.layer1 = nn.Linear(input_channels, 1)
12
  self.tanh1 = nn.Tanh()
13
  # self.layer2 = nn.Linear(2 * input_channels, 1)
14
  # self.tanh2 = nn.Tanh()
15
 
16
  def forward(self, state):
17
-
18
  hidden = self.layer1(state)
19
  hidden = self.tanh1(hidden)
20
  # hidden = self.layer2(hidden)
21
  # action = self.tanh2(hidden)
22
 
23
  return hidden
24
-
25
-
26
-
27
-
 
 
 
1
  import torch.nn as nn
2
 
3
 
4
  class Policy(nn.Module):
5
  def __init__(self, input_channels=8):
 
6
  super(Policy, self).__init__()
 
7
  self.layer1 = nn.Linear(input_channels, 1)
8
  self.tanh1 = nn.Tanh()
9
  # self.layer2 = nn.Linear(2 * input_channels, 1)
10
  # self.tanh2 = nn.Tanh()
11
 
12
  def forward(self, state):
 
13
  hidden = self.layer1(state)
14
  hidden = self.tanh1(hidden)
15
  # hidden = self.layer2(hidden)
16
  # action = self.tanh2(hidden)
17
 
18
  return hidden
 
 
 
 
rl_agent/utils.py CHANGED
@@ -11,16 +11,11 @@ class myOptimizer():
11
  self.transaction_cost = transaction_cost
12
 
13
  def step(self, grad_n, reward, last_observation, last_gradient):
14
-
15
  numerator = self.mu_square - (self.mu * reward)
16
  denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
17
-
18
  gradient = numerator / denominator
19
-
20
  current_grad = (-1.0 * self.transaction_cost * grad_n)
21
-
22
  previous_grad = (last_observation + self.transaction_cost) * last_gradient
23
-
24
  gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
25
 
26
  return torch.as_tensor(self.lr * gradient)
 
11
  self.transaction_cost = transaction_cost
12
 
13
  def step(self, grad_n, reward, last_observation, last_gradient):
 
14
  numerator = self.mu_square - (self.mu * reward)
15
  denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
 
16
  gradient = numerator / denominator
 
17
  current_grad = (-1.0 * self.transaction_cost * grad_n)
 
18
  previous_grad = (last_observation + self.transaction_cost) * last_gradient
 
19
  gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
20
 
21
  return torch.as_tensor(self.lr * gradient)