import numpy as np import pandas as pd class Environment: def __init__(self, data, history_t=90): self.data = data self.history_t = history_t self.reset() def reset(self): self.t = 0 self.done = False self.profits = 0 self.positions = [] self.position_value = 0 self.history = [0 for _ in range(self.history_t)] return [self.position_value] + self.history # obs def step(self, act): reward = 0 # act = 0: stay, 1: buy, -1: sell if act == 1: self.positions.append(self.data.iloc[self.t, :]['Close']) elif act == 2: # sell if len(self.positions) == 0: reward = -1 else: profits = 0 for p in self.positions: profits += (self.data.iloc[self.t, :]['Close'] - p) reward += profits self.profits += profits self.positions = [] # set next time self.t += 1 self.position_value = 0 for p in self.positions: self.position_value += (self.data.iloc[self.t, :]['Close'] - p) self.history.pop(0) self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close']) # clipping reward if reward > 0: reward = 1 elif reward < 0: reward = -1 return [self.position_value] + self.history, reward, self.done # obs, reward, done if __name__ == "__main__": data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv') # data['Local time'] = pd.to_datetime(data['Local time']) data = data.set_index('Local time') print(data.index.min(), data.index.max()) date_split = '19.09.2022 17:55:00.000 GMT-0500' train = data[:date_split] test = data[date_split:] print(train.head(10)) env = Environment(train) print(env.reset()) for _ in range(3): pact = np.random.randint(3) print(env.step(pact))