Spaces:

ATB
/

AI-trade-bot-demo

Runtime error

App Files Files Community

PZR0033 commited on Feb 12, 2023

Commit

8e3072c

1 Parent(s): 634b732

final version without initial equity

Browse files

Files changed (3) hide show

rl_agent/env.py +3 -2
rl_agent/test_env.py +29 -8
rl_agent/utils.py +11 -10

rl_agent/env.py CHANGED Viewed

@@ -4,10 +4,11 @@ import torch
 class Environment:
-    def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9]):
         self.data = data
         self.history = history
         self.history_t = history_t
         self.cost_rate = 0.0001
         self.reset()
@@ -16,7 +17,7 @@ class Environment:
         self.done = False
         self.profits = 0
         self.position_value = 0.
-        self.history = self.history[:7]
         return [self.position_value] + self.history # obs
     def step(self, act):

 class Environment:
+    def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9], state_size=9):
         self.data = data
         self.history = history
         self.history_t = history_t
+        self.state_size = state_size
         self.cost_rate = 0.0001
         self.reset()
         self.done = False
         self.profits = 0
         self.position_value = 0.
+        self.history = self.history[:self.state_size - 1]
         return [self.position_value] + self.history # obs
     def step(self, act):

rl_agent/test_env.py CHANGED Viewed

@@ -5,6 +5,7 @@ from utils import myOptimizer
 import pandas as pd
 import numpy as np
 import torch
 if __name__ == "__main__":
@@ -16,16 +17,17 @@ if __name__ == "__main__":
     date_split = '19.09.2022 17:55:00.000 GMT-0500'
     train = data[:date_split]
     test = data[date_split:]
-    print(train.head(10))
     learning_rate = 0.01
     first_momentum = 0.0
     second_momentum = 0.0
     transaction_cost = 0.0001
     adaptation_rate = 0.01
-    state_size = 8
-    agent = Policy(input_channels=state_size).float()
     optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
@@ -35,14 +37,33 @@ if __name__ == "__main__":
         c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
         history.append(c)
-    env = Environment(train, history=history)
     observation = env.reset()
-    for _ in range(9, 12):
-        print(type(observation))
-        observation = torch.as_tensor(observation)
         action = agent(observation)
         observation, reward, _ = env.step(action.data.numpy())
-    print(env.profits)

 import pandas as pd
 import numpy as np
 import torch
+from collections import OrderedDict
 if __name__ == "__main__":
     date_split = '19.09.2022 17:55:00.000 GMT-0500'
     train = data[:date_split]
     test = data[date_split:]
+    initial_money = 10.0
     learning_rate = 0.01
     first_momentum = 0.0
     second_momentum = 0.0
     transaction_cost = 0.0001
     adaptation_rate = 0.01
+    state_size = 15
+    agent = Policy(input_channels=state_size)
     optimizer = myOptimizer(learning_rate, first_momentum, second_momentum, adaptation_rate, transaction_cost)
         c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
         history.append(c)
+    env = Environment(train, history=history, state_size=state_size)
     observation = env.reset()
+    model_gradients_history = dict()
+    checkpoint = OrderedDict()
+    for name, param in agent.named_parameters():
+        model_gradients_history.update({name: torch.zeros_like(param)})
+    for _ in range(state_size, 1440):
+        observation = torch.as_tensor(observation).float()
         action = agent(observation)
         observation, reward, _ = env.step(action.data.numpy())
+        action.backward()
+        for name, param in agent.named_parameters():
+            grad_n = param.grad
+            param = param + optimizer.step(grad_n, reward, observation[-1], model_gradients_history[name])
+            checkpoint[name] = param
+            model_gradients_history.update({name: grad_n})
+        agent.load_state_dict(checkpoint)
+    print(env.profits)

rl_agent/utils.py CHANGED Viewed

@@ -8,27 +8,28 @@ class myOptimizer():
         self.mu = mu
         self.mu_square = mu_square
         self.adaptation_rate = adaptation_rate
-        self.last_gradient = 0.0
         self.transaction_cost = transaction_cost
-    def step(self, grad_n, reward, last_observation):
         numerator = self.mu_square - (self.mu * reward)
-        denominator = torch.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
         gradient = numerator / (denominator + 1e-8)
-        self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
-        self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)
         current_grad = (-1.0 * self.transaction_cost * grad_n)
-        previous_grad = (last_observation + self.transaction_cost) * self.last_gradient
-        gradient = gradient * (current_grad + previous_grad)
-        self.last_gradient = grad_n
-        return self.lr * gradient

         self.mu = mu
         self.mu_square = mu_square
         self.adaptation_rate = adaptation_rate
         self.transaction_cost = transaction_cost
+    def step(self, grad_n, reward, last_observation, last_gradient):
         numerator = self.mu_square - (self.mu * reward)
+        denominator = np.sqrt((self.mu_square - (self.mu ** 2)) ** 3)
         gradient = numerator / (denominator + 1e-8)
         current_grad = (-1.0 * self.transaction_cost * grad_n)
+        previous_grad = (last_observation + self.transaction_cost) * last_gradient
+        gradient = torch.as_tensor(gradient) * (current_grad + previous_grad)
+        return torch.as_tensor(self.lr * gradient)
+    def after_step(self, reward):
+        self.mu = self.mu + self.adaptation_rate * (reward - self.mu)
+        self.mu_square = self.mu_square + self.adaptation_rate * ((reward ** 2) - self.mu_square)