|
import numpy as np |
|
import pandas as pd |
|
import gym |
|
from gym.spaces import Box |
|
from collections.abc import Iterable |
|
|
|
from datetime import datetime, timedelta |
|
import pickle |
|
|
|
def my_predict(t, profet_path): |
|
date = str(datetime.today()+timedelta(days=int(t)))[:11] |
|
|
|
with open(profet_path, 'rb') as f: |
|
m = pickle.load(f) |
|
future_dates = pd.DataFrame({'ds': [date]}) |
|
forecast = m.predict(future_dates) |
|
return forecast['yhat'][0]/100 |
|
|
|
|
|
def predict_regression(model_path, data): |
|
with open(model_path, 'rb') as f: |
|
model = pickle.load(f) |
|
|
|
prediction = model.predict(data)[0] |
|
|
|
return prediction |
|
|
|
|
|
class GeneralizedRLEnvironment(gym.Env): |
|
def __init__(self, input_data): |
|
self.my_data = {} |
|
self.input_data = input_data |
|
self.T = 100 |
|
self.episode = 1 |
|
self.t = 0 |
|
self.done = False |
|
self.action_space_range = [-1, 1] |
|
|
|
observations_low = [] |
|
observation_high = [] |
|
for variable in self.input_data['state']['observable_factors']: |
|
observations_low.append( |
|
eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][0]))) |
|
observation_high.append( |
|
eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][1]))) |
|
|
|
for model_predictions in self.input_data['state']['model_predictions']: |
|
for _ in range(model_predictions['number_of_values_to_derive']): |
|
observations_low.append(eval( |
|
str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][0]))) |
|
observation_high.append(eval( |
|
str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][1]))) |
|
|
|
actions_low = [] |
|
actions_high = [] |
|
|
|
for action in self.input_data['actions']['action_space']: |
|
if action['type'] in ['int', 'double', 'float']: |
|
actions_low.append(-1) |
|
actions_high.append(1) |
|
|
|
if action['type'] in ['list']: |
|
actions_low.append(0) |
|
actions_high.append(len(action['list'])) |
|
|
|
self.observation_space = Box(low=np.array( |
|
observations_low), high=np.array(observation_high)) |
|
self.action_space = Box(low=np.array( |
|
actions_low), high=np.array(actions_high)) |
|
self.reset() |
|
|
|
def formate_string(self, my_str): |
|
op_list = my_str.split("{") |
|
final = [] |
|
|
|
for op in op_list: |
|
if "}" in op: |
|
if len(op.split("}")[0].split("[")) > 1: |
|
final.append("self.my_data["+"'"+op.split("}")[0].split( |
|
"[")[0]+"']" + op[op.find('['): op.rfind(']')+1] + op.split("}")[1]) |
|
else: |
|
final.append( |
|
"self.my_data["+"'"+op.split("}")[0]+"'" + "]"+op.split("}")[1]) |
|
else: |
|
final.append(op) |
|
|
|
return "".join(final) |
|
|
|
def step(self, action): |
|
""" |
|
Execute an action in the environment and return the next state, reward, and done flag. |
|
""" |
|
self.my_data['actions'] = action |
|
self.my_data['reward'] = 0 |
|
for indx, action_var in enumerate(self.input_data['actions']['action_space']): |
|
if action_var['type'] in ['int', 'double', 'float']: |
|
scalled_action = ((action[indx] - self.action_space_range[0])/(self.action_space_range[1] - self.action_space_range[0]))*(self.input_data['actions']['RL_boundaries'] |
|
[action_var['name']][1] - self.input_data['actions']['RL_boundaries'][action_var['name']][0]) + self.input_data['actions']['RL_boundaries'][action_var['name']][0] |
|
self.my_data[action_var['name']] = scalled_action |
|
if action_var['type'] in ['list']: |
|
self.my_data[action_var['name']] = action_var['list'][int( |
|
np.floor(action[indx] - 0.000000001))] |
|
|
|
for calc in self.input_data['environment']['step']: |
|
exec(self.formate_string(calc)) |
|
|
|
for calc in self.input_data['environment']['reward']: |
|
exec(self.formate_string(calc)) |
|
|
|
self.t += 1 |
|
if self.t == self.T-1: |
|
self.episode += 1 |
|
self.done = True |
|
global df2 |
|
|
|
next_state = self.get_next_step() |
|
|
|
return next_state, self.my_data['reward'], self.done, {"scalled_action": scalled_action} |
|
|
|
def reset(self): |
|
""" |
|
Reset the environment to its initial state. |
|
""" |
|
self.t = 0 |
|
self.done = False |
|
observations = self.observation_space.sample() |
|
|
|
index = 0 |
|
for variable in self.input_data['state']['observable_factors']: |
|
if 'starting_value' in variable: |
|
observations[index] = variable['starting_value'] |
|
self.my_data[variable['name']] = observations[index] |
|
index += 1 |
|
|
|
for variable in self.input_data['state']['model_predictions']: |
|
if variable['number_of_values_to_derive'] > 1: |
|
my_list = [] |
|
for _ in range(variable['number_of_values_to_derive']): |
|
my_list.append(observations[index]) |
|
index += 1 |
|
self.my_data[variable['name']] = my_list |
|
|
|
elif variable['number_of_values_to_derive'] == 1: |
|
self.my_data[variable['name']] = observations[index] |
|
|
|
if self.input_data['state']['constant_factors'] != None: |
|
for key in self.input_data['state']['constant_factors'].keys(): |
|
self.my_data[key] = self.input_data['state']['constant_factors'][key] |
|
|
|
return observations |
|
|
|
def get_next_step(self): |
|
observations = [] |
|
for variable in self.input_data['state']['observable_factors']: |
|
observations.append(self.my_data[variable['name']]) |
|
|
|
for variable in self.input_data['state']['model_predictions']: |
|
if variable['number_of_values_to_derive'] > 1: |
|
if variable['model_type'] == 'time_series': |
|
self.my_data[variable['name']] = [my_predict( |
|
i+self.t, variable['model_path']) for i in range(variable['number_of_values_to_derive'])] |
|
observations = np.hstack( |
|
(np.array(observations), np.array(self.my_data[variable['name']]))) |
|
|
|
elif variable['model_type'] == 'regression': |
|
for i in range(variable['number_of_values_to_derive']): |
|
input_data = [] |
|
for input in variable['input_variables']: |
|
if isinstance(self.my_data[input], Iterable): |
|
input_data.append(self.my_data[input][0]) |
|
else: |
|
input_data.append(self.my_data[input]) |
|
self.my_data[variable['name']] = predict_regression( |
|
variable['model_path'], [input_data]) |
|
observations = np.append(observations, predict_regression( |
|
variable['model_path'], [input_data])) |
|
|
|
elif variable['number_of_values_to_derive'] == 1: |
|
if variable['model_type'] == 'time_series': |
|
self.my_data[variable['name']] = my_predict( |
|
i+self.t, variable['model_path']) |
|
observations = np.hstack( |
|
(np.array(observations), np.array(self.my_data[variable['name']]))) |
|
|
|
elif variable['model_type'] == 'regression': |
|
for i in range(variable['number_of_values_to_derive']): |
|
input_data = [] |
|
for input in variable['input_variables']: |
|
if isinstance(self.my_data[input], Iterable): |
|
input_data.append(self.my_data[input][0]) |
|
else: |
|
input_data.append(self.my_data[input]) |
|
self.my_data[variable['name']] = predict_regression( |
|
variable['model_path'], [input_data]) |
|
observations = np.append(observations, predict_regression( |
|
variable['model_path'], [input_data])) |
|
|
|
return observations |