import numpy as np import pandas as pd import gym from gym.spaces import Box from collections.abc import Iterable from datetime import datetime, timedelta import pickle def my_predict(t, profet_path): date = str(datetime.today()+timedelta(days=int(t)))[:11] with open(profet_path, 'rb') as f: m = pickle.load(f) future_dates = pd.DataFrame({'ds': [date]}) forecast = m.predict(future_dates) return forecast['yhat'][0]/100 def predict_regression(model_path, data): with open(model_path, 'rb') as f: model = pickle.load(f) prediction = model.predict(data)[0] return prediction class GeneralizedRLEnvironment(gym.Env): def __init__(self, input_data): self.my_data = {} self.input_data = input_data self.T = 100 self.episode = 1 self.t = 0 self.done = False self.action_space_range = [-1, 1] observations_low = [] observation_high = [] for variable in self.input_data['state']['observable_factors']: observations_low.append( eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][0]))) observation_high.append( eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][1]))) for model_predictions in self.input_data['state']['model_predictions']: for _ in range(model_predictions['number_of_values_to_derive']): observations_low.append(eval( str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][0]))) observation_high.append(eval( str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][1]))) actions_low = [] actions_high = [] for action in self.input_data['actions']['action_space']: if action['type'] in ['int', 'double', 'float']: actions_low.append(-1) actions_high.append(1) if action['type'] in ['list']: actions_low.append(0) actions_high.append(len(action['list'])) self.observation_space = Box(low=np.array( observations_low), high=np.array(observation_high)) self.action_space = Box(low=np.array( actions_low), high=np.array(actions_high)) self.reset() def formate_string(self, my_str): op_list = my_str.split("{") final = [] for op in op_list: if "}" in op: if len(op.split("}")[0].split("[")) > 1: final.append("self.my_data["+"'"+op.split("}")[0].split( "[")[0]+"']" + op[op.find('['): op.rfind(']')+1] + op.split("}")[1]) else: final.append( "self.my_data["+"'"+op.split("}")[0]+"'" + "]"+op.split("}")[1]) else: final.append(op) return "".join(final) def step(self, action): """ Execute an action in the environment and return the next state, reward, and done flag. """ self.my_data['actions'] = action self.my_data['reward'] = 0 for indx, action_var in enumerate(self.input_data['actions']['action_space']): if action_var['type'] in ['int', 'double', 'float']: scalled_action = ((action[indx] - self.action_space_range[0])/(self.action_space_range[1] - self.action_space_range[0]))*(self.input_data['actions']['RL_boundaries'] [action_var['name']][1] - self.input_data['actions']['RL_boundaries'][action_var['name']][0]) + self.input_data['actions']['RL_boundaries'][action_var['name']][0] self.my_data[action_var['name']] = scalled_action if action_var['type'] in ['list']: self.my_data[action_var['name']] = action_var['list'][int( np.floor(action[indx] - 0.000000001))] for calc in self.input_data['environment']['step']: exec(self.formate_string(calc)) for calc in self.input_data['environment']['reward']: exec(self.formate_string(calc)) self.t += 1 if self.t == self.T-1: self.episode += 1 self.done = True global df2 next_state = self.get_next_step() return next_state, self.my_data['reward'], self.done, {"scalled_action": scalled_action} def reset(self): """ Reset the environment to its initial state. """ self.t = 0 self.done = False observations = self.observation_space.sample() index = 0 for variable in self.input_data['state']['observable_factors']: if 'starting_value' in variable: observations[index] = variable['starting_value'] self.my_data[variable['name']] = observations[index] index += 1 for variable in self.input_data['state']['model_predictions']: if variable['number_of_values_to_derive'] > 1: my_list = [] for _ in range(variable['number_of_values_to_derive']): my_list.append(observations[index]) index += 1 self.my_data[variable['name']] = my_list elif variable['number_of_values_to_derive'] == 1: self.my_data[variable['name']] = observations[index] if self.input_data['state']['constant_factors'] != None: for key in self.input_data['state']['constant_factors'].keys(): self.my_data[key] = self.input_data['state']['constant_factors'][key] return observations def get_next_step(self): observations = [] for variable in self.input_data['state']['observable_factors']: observations.append(self.my_data[variable['name']]) for variable in self.input_data['state']['model_predictions']: if variable['number_of_values_to_derive'] > 1: if variable['model_type'] == 'time_series': self.my_data[variable['name']] = [my_predict( i+self.t, variable['model_path']) for i in range(variable['number_of_values_to_derive'])] observations = np.hstack( (np.array(observations), np.array(self.my_data[variable['name']]))) elif variable['model_type'] == 'regression': for i in range(variable['number_of_values_to_derive']): input_data = [] for input in variable['input_variables']: if isinstance(self.my_data[input], Iterable): input_data.append(self.my_data[input][0]) else: input_data.append(self.my_data[input]) self.my_data[variable['name']] = predict_regression( variable['model_path'], [input_data]) observations = np.append(observations, predict_regression( variable['model_path'], [input_data])) elif variable['number_of_values_to_derive'] == 1: if variable['model_type'] == 'time_series': self.my_data[variable['name']] = my_predict( i+self.t, variable['model_path']) observations = np.hstack( (np.array(observations), np.array(self.my_data[variable['name']]))) elif variable['model_type'] == 'regression': for i in range(variable['number_of_values_to_derive']): input_data = [] for input in variable['input_variables']: if isinstance(self.my_data[input], Iterable): input_data.append(self.my_data[input][0]) else: input_data.append(self.my_data[input]) self.my_data[variable['name']] = predict_regression( variable['model_path'], [input_data]) observations = np.append(observations, predict_regression( variable['model_path'], [input_data])) return observations