ngthanhtinqn's picture
add env
c5f80c4
raw
history blame
2.13 kB
import numpy as np
import pandas as pd
class Environment:
def __init__(self, data, history_t=90):
self.data = data
self.history_t = history_t
self.reset()
def reset(self):
self.t = 0
self.done = False
self.profits = 0
self.positions = []
self.position_value = 0
self.history = [0 for _ in range(self.history_t)]
return [self.position_value] + self.history # obs
def step(self, act):
reward = 0
# act = 0: stay, 1: buy, -1: sell
if act == 1:
self.positions.append(self.data.iloc[self.t, :]['Close'])
elif act == 2: # sell
if len(self.positions) == 0:
reward = -1
else:
profits = 0
for p in self.positions:
profits += (self.data.iloc[self.t, :]['Close'] - p)
reward += profits
self.profits += profits
self.positions = []
# set next time
self.t += 1
self.position_value = 0
for p in self.positions:
self.position_value += (self.data.iloc[self.t, :]['Close'] - p)
self.history.pop(0)
self.history.append(self.data.iloc[self.t, :]['Close'] - self.data.iloc[(self.t-1), :]['Close'])
# clipping reward
if reward > 0:
reward = 1
elif reward < 0:
reward = -1
return [self.position_value] + self.history, reward, self.done # obs, reward, done
if __name__ == "__main__":
data = pd.read_csv('./data/EURUSD_Candlestick_1_M_BID_01.01.2021-04.02.2023.csv')
# data['Local time'] = pd.to_datetime(data['Local time'])
data = data.set_index('Local time')
print(data.index.min(), data.index.max())
date_split = '19.09.2022 17:55:00.000 GMT-0500'
train = data[:date_split]
test = data[date_split:]
print(train.head(10))
env = Environment(train)
print(env.reset())
for _ in range(3):
pact = np.random.randint(3)
print(env.step(pact))