Spaces:
Runtime error
Runtime error
Commit
·
53b9021
1
Parent(s):
c3e8fb1
Visualize total profits
Browse files- app.py +30 -16
- rl_agent/env.py +4 -1
- rl_agent/policy.py +0 -9
- rl_agent/utils.py +0 -5
app.py
CHANGED
@@ -23,6 +23,10 @@ def get_profit():
|
|
23 |
return profit
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
26 |
def pretrain_rl_agent():
|
27 |
global equity
|
28 |
observations = env_train.reset()
|
@@ -31,7 +35,7 @@ def pretrain_rl_agent():
|
|
31 |
observations = torch.as_tensor(observations).float()
|
32 |
action = agent(observations)
|
33 |
observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
|
34 |
-
reward *= 1e3
|
35 |
|
36 |
action.backward()
|
37 |
|
@@ -88,17 +92,19 @@ dt_breaks = [d for d in dt_all.strftime("%Y-%m-%d").tolist() if not d in list(dt
|
|
88 |
|
89 |
df_data_test = df_data[df_data['Date'].dt.year == test_year]
|
90 |
df_data_train = df_data[df_data['Date'].dt.year != test_year]
|
|
|
|
|
91 |
# ----------------------------------------------------------------------------------------------------------------------
|
92 |
|
93 |
# ----------------------------------------------------------------------------------------------------------------------
|
94 |
# For RL Agent
|
95 |
# ----------------------------------------------------------------------------------------------------------------------
|
96 |
data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
|
97 |
-
data = data.
|
98 |
data = data.set_index('Local time')
|
99 |
-
date_split = '
|
100 |
|
101 |
-
learning_rate = 0.
|
102 |
first_momentum = 0.0
|
103 |
second_momentum = 0.0001
|
104 |
transaction_cost = 0.0001
|
@@ -107,7 +113,7 @@ state_size = 15
|
|
107 |
equity = 1.0
|
108 |
|
109 |
train = data[:date_split]
|
110 |
-
test = train.
|
111 |
|
112 |
# Initialize agent and optimizer
|
113 |
agent = Policy(input_channels=state_size)
|
@@ -120,7 +126,13 @@ for i in range(1, state_size):
|
|
120 |
|
121 |
# Initialize train and test environments
|
122 |
env_train = Environment(train, history=history, state_size=state_size)
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
model_gradients_history = dict()
|
126 |
checkpoint = OrderedDict()
|
@@ -149,8 +161,8 @@ def trading_plot():
|
|
149 |
action, observations = make_prediction(observations)
|
150 |
actions.append(action.item())
|
151 |
position = statistics.mean(actions)
|
152 |
-
profit += -1.0 * (last_observation - observations[-1]) * position
|
153 |
-
|
154 |
else:
|
155 |
df_data_train = df_data
|
156 |
|
@@ -188,8 +200,8 @@ def trading_plot():
|
|
188 |
|
189 |
# The UI of the demo defines here.
|
190 |
with gr.Blocks() as demo:
|
191 |
-
gr.Markdown("Auto
|
192 |
-
|
193 |
|
194 |
dt = gr.Textbox(label="Total profit")
|
195 |
demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
|
@@ -200,18 +212,20 @@ with gr.Blocks() as demo:
|
|
200 |
|
201 |
with gr.Row():
|
202 |
with gr.Column():
|
203 |
-
gr.Markdown("User Interactive panel
|
204 |
amount = gr.components.Textbox(value="", label="Amount", interactive=True)
|
205 |
with gr.Row():
|
206 |
buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
|
207 |
sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
|
208 |
hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
|
|
|
209 |
with gr.Column():
|
210 |
-
gr.Markdown("Trade bot history
|
211 |
-
|
212 |
-
trade_bot_table = gr.Dataframe(
|
213 |
-
#
|
214 |
-
|
|
|
215 |
|
216 |
demo.launch()
|
217 |
|
|
|
23 |
return profit
|
24 |
|
25 |
|
26 |
+
# def update_table():
|
27 |
+
# global
|
28 |
+
|
29 |
+
|
30 |
def pretrain_rl_agent():
|
31 |
global equity
|
32 |
observations = env_train.reset()
|
|
|
35 |
observations = torch.as_tensor(observations).float()
|
36 |
action = agent(observations)
|
37 |
observations, reward, _ = env_train.step(action.data.to("cpu").numpy())
|
38 |
+
# reward *= 1e3
|
39 |
|
40 |
action.backward()
|
41 |
|
|
|
92 |
|
93 |
df_data_test = df_data[df_data['Date'].dt.year == test_year]
|
94 |
df_data_train = df_data[df_data['Date'].dt.year != test_year]
|
95 |
+
|
96 |
+
df_data_train_viz = pd.DataFrame(columns=["Action", "Amount", "Profit"])
|
97 |
# ----------------------------------------------------------------------------------------------------------------------
|
98 |
|
99 |
# ----------------------------------------------------------------------------------------------------------------------
|
100 |
# For RL Agent
|
101 |
# ----------------------------------------------------------------------------------------------------------------------
|
102 |
data = pd.read_csv(f'./data/EURUSD_Candlestick_1_M_BID_01.01.{start_year}-04.02.2023.csv')
|
103 |
+
data = data.head(600000)
|
104 |
data = data.set_index('Local time')
|
105 |
+
date_split = '31.01.2022 03:29:00.000 GMT-0600'
|
106 |
|
107 |
+
learning_rate = 0.001
|
108 |
first_momentum = 0.0
|
109 |
second_momentum = 0.0001
|
110 |
transaction_cost = 0.0001
|
|
|
113 |
equity = 1.0
|
114 |
|
115 |
train = data[:date_split]
|
116 |
+
test = pd.concat([train.tail(state_size), data[date_split:]])
|
117 |
|
118 |
# Initialize agent and optimizer
|
119 |
agent = Policy(input_channels=state_size)
|
|
|
126 |
|
127 |
# Initialize train and test environments
|
128 |
env_train = Environment(train, history=history, state_size=state_size)
|
129 |
+
|
130 |
+
history = []
|
131 |
+
for i in range(1, state_size):
|
132 |
+
c = test.iloc[i, :]['Close'] - test.iloc[i - 1, :]['Close']
|
133 |
+
history.append(c)
|
134 |
+
|
135 |
+
env_test = Environment(test, history=history, state_size=state_size)
|
136 |
|
137 |
model_gradients_history = dict()
|
138 |
checkpoint = OrderedDict()
|
|
|
161 |
action, observations = make_prediction(observations)
|
162 |
actions.append(action.item())
|
163 |
position = statistics.mean(actions)
|
164 |
+
# profit += -1.0 * (last_observation - observations[-1]) * position
|
165 |
+
profit = env_test.profits
|
166 |
else:
|
167 |
df_data_train = df_data
|
168 |
|
|
|
200 |
|
201 |
# The UI of the demo defines here.
|
202 |
with gr.Blocks() as demo:
|
203 |
+
gr.Markdown("Auto AI Trading Bot")
|
204 |
+
gr.Markdown(f"Investment: $100,000")
|
205 |
|
206 |
dt = gr.Textbox(label="Total profit")
|
207 |
demo.queue().load(get_profit, inputs=None, outputs=dt, every=1)
|
|
|
212 |
|
213 |
with gr.Row():
|
214 |
with gr.Column():
|
215 |
+
gr.Markdown("User Interactive panel")
|
216 |
amount = gr.components.Textbox(value="", label="Amount", interactive=True)
|
217 |
with gr.Row():
|
218 |
buy_btn = gr.components.Button("Buy", label="Buy", interactive=True, inputs=[amount])
|
219 |
sell_btn = gr.components.Button("Sell", label="Sell", interactive=True, inputs=[amount])
|
220 |
hold_btn = gr.components.Button("Hold", label="Hold", interactive=True, inputs=[amount])
|
221 |
+
|
222 |
with gr.Column():
|
223 |
+
gr.Markdown("Trade bot history")
|
224 |
+
|
225 |
+
# trade_bot_table = gr.Dataframe(df_data_train_viz)
|
226 |
+
# demo.queue().load(update_table, inputs=None, outputs=trade_bot_table, every=1)
|
227 |
+
# Show trade box history in a table or something
|
228 |
+
# gr.components.Textbox(value="Some history? Need to decide how to show bot history", label="History", interactive=True)
|
229 |
|
230 |
demo.launch()
|
231 |
|
rl_agent/env.py
CHANGED
@@ -21,10 +21,13 @@ class Environment:
|
|
21 |
return [self.position_value] + self.history # obs
|
22 |
|
23 |
def step(self, act):
|
|
|
|
|
|
|
24 |
reward = 0
|
25 |
|
26 |
# act = 0: stay, act > 0: buy, act < 0: sell
|
27 |
-
#Additive profits
|
28 |
cost_amount = np.abs(act-self.position_value)
|
29 |
|
30 |
Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
|
|
|
21 |
return [self.position_value] + self.history # obs
|
22 |
|
23 |
def step(self, act):
|
24 |
+
# if self.t == len(self.data) - 1:
|
25 |
+
# return self.history + [0], 0, False
|
26 |
+
|
27 |
reward = 0
|
28 |
|
29 |
# act = 0: stay, act > 0: buy, act < 0: sell
|
30 |
+
# Additive profits
|
31 |
cost_amount = np.abs(act-self.position_value)
|
32 |
|
33 |
Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
|
rl_agent/policy.py
CHANGED
@@ -1,27 +1,18 @@
|
|
1 |
-
import numpy as np
|
2 |
-
import torch
|
3 |
import torch.nn as nn
|
4 |
|
5 |
|
6 |
class Policy(nn.Module):
|
7 |
def __init__(self, input_channels=8):
|
8 |
-
|
9 |
super(Policy, self).__init__()
|
10 |
-
|
11 |
self.layer1 = nn.Linear(input_channels, 1)
|
12 |
self.tanh1 = nn.Tanh()
|
13 |
# self.layer2 = nn.Linear(2 * input_channels, 1)
|
14 |
# self.tanh2 = nn.Tanh()
|
15 |
|
16 |
def forward(self, state):
|
17 |
-
|
18 |
hidden = self.layer1(state)
|
19 |
hidden = self.tanh1(hidden)
|
20 |
# hidden = self.layer2(hidden)
|
21 |
# action = self.tanh2(hidden)
|
22 |
|
23 |
return hidden
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
|
|
|
|
|
|
1 |
import torch.nn as nn
|
2 |
|
3 |
|
4 |
class Policy(nn.Module):
|
5 |
def __init__(self, input_channels=8):
|
|
|
6 |
super(Policy, self).__init__()
|
|
|
7 |
self.layer1 = nn.Linear(input_channels, 1)
|
8 |
self.tanh1 = nn.Tanh()
|
9 |
# self.layer2 = nn.Linear(2 * input_channels, 1)
|
10 |
# self.tanh2 = nn.Tanh()
|
11 |
|
12 |
def forward(self, state):
|
|
|
13 |
hidden = self.layer1(state)
|
14 |
hidden = self.tanh1(hidden)
|
15 |
# hidden = self.layer2(hidden)
|
16 |
# action = self.tanh2(hidden)
|
17 |
|
18 |
return hidden
|
|
|
|
|
|
|
|
rl_agent/utils.py
CHANGED
@@ -11,16 +11,11 @@ class myOptimizer():
|
|
11 |
self.transaction_cost = transaction_cost
|
12 |
|
13 |
def step(self, grad_n, reward, last_observation, last_gradient):
|
14 |
-
|
15 |
numerator = self.mu_square - (self.mu * reward)
|
16 |
denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
17 |
-
|
18 |
gradient = numerator / denominator
|
19 |
-
|
20 |
current_grad = (-1.0 * self.transaction_cost * grad_n)
|
21 |
-
|
22 |
previous_grad = (last_observation + self.transaction_cost) * last_gradient
|
23 |
-
|
24 |
gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
|
25 |
|
26 |
return torch.as_tensor(self.lr * gradient)
|
|
|
11 |
self.transaction_cost = transaction_cost
|
12 |
|
13 |
def step(self, grad_n, reward, last_observation, last_gradient):
|
|
|
14 |
numerator = self.mu_square - (self.mu * reward)
|
15 |
denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
|
|
|
16 |
gradient = numerator / denominator
|
|
|
17 |
current_grad = (-1.0 * self.transaction_cost * grad_n)
|
|
|
18 |
previous_grad = (last_observation + self.transaction_cost) * last_gradient
|
|
|
19 |
gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
|
20 |
|
21 |
return torch.as_tensor(self.lr * gradient)
|