File size: 2,259 Bytes
80754e5
 
634b732
80754e5
 
 
8e3072c
80754e5
7bfc81f
80754e5
8e3072c
7bfc81f
80754e5
 
 
 
 
 
7bfc81f
8e3072c
80754e5
 
 
53b9021
 
 
80754e5
 
7bfc81f
53b9021
7bfc81f
80754e5
7bfc81f
 
f7ed643
 
7bfc81f
80754e5
 
7bfc81f
80754e5
7bfc81f
 
634b732
 
80754e5
 
 
 
 
 
 
 
 
 
 
 
 
 
7bfc81f
 
 
 
 
 
80754e5
7bfc81f
80754e5
7bfc81f
80754e5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import numpy as np
import pandas as pd
import torch

class Environment:
    
    def __init__(self, data, history_t=8, history=[0.1, 0.2, -0.1, -0.2, 0., 0.5, 0.9], state_size=9):
        self.data = data
        self.history = history
        self.history_t = history_t
        self.state_size = state_size
        self.cost_rate = 0.0001
        self.reset()
        
    def reset(self):
        self.t = 0
        self.done = False
        self.profits = 0
        self.position_value = 0.
        self.history = self.history[:self.state_size - 1]
        return [self.position_value] + self.history # obs
    
    def step(self, act):
        # if self.t == len(self.data) - 1:
        #     return self.history + [0], 0, False

        reward = 0
        
        # act = 0: stay, act > 0: buy, act < 0: sell
        # Additive profits
        cost_amount = np.abs(act-self.position_value)
        
        Zt = self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']
        reward = (self.position_value * Zt) - (self.cost_rate * cost_amount)
        self.profit = self.position_value * Zt
        self.profits += self.profit

        # set next time
        self.t += 1
        self.position_value = act
        
        self.history.pop(0)
        self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # the price being traded
        self.position_value = self.position_value.item()

        return [self.position_value] + self.history, reward, self.done # obs, reward, done


if __name__ == "__main__":
    data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
    # data['Local time'] = pd.to_datetime(data['Local time'])
    data = data.set_index('Local time')
    print(data.index.min(), data.index.max())

    date_split = '19.09.2022 17:55:00.000 GMT-0500'
    train = data[:date_split]
    test = data[date_split:]
    print(train.head(10))

    history = []
    for i in range(1, 9):
        c = train.iloc[i, :]['Close'] - train.iloc[i-1, :]['Close']
        history.append(c)

    env = Environment(train, history=history)
    print(env.reset())
    for _ in range(9, 12):
        pact = np.random.randint(3)
        print(env.step(pact)[1])