step_reward_generation / rl_environment.py
HarshSanghavi's picture
Upload 5 files
17a7095 verified
import numpy as np
import pandas as pd
import gym
from gym.spaces import Box
from collections.abc import Iterable
from datetime import datetime, timedelta
import pickle
def my_predict(t, profet_path):
date = str(datetime.today()+timedelta(days=int(t)))[:11]
with open(profet_path, 'rb') as f:
m = pickle.load(f)
future_dates = pd.DataFrame({'ds': [date]})
forecast = m.predict(future_dates)
return forecast['yhat'][0]/100
def predict_regression(model_path, data):
with open(model_path, 'rb') as f:
model = pickle.load(f)
prediction = model.predict(data)[0]
return prediction
class GeneralizedRLEnvironment(gym.Env):
def __init__(self, input_data):
self.my_data = {}
self.input_data = input_data
self.T = 100
self.episode = 1
self.t = 0
self.done = False
self.action_space_range = [-1, 1]
observations_low = []
observation_high = []
for variable in self.input_data['state']['observable_factors']:
observations_low.append(
eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][0])))
observation_high.append(
eval(str(self.input_data['actions']['RL_boundaries'][variable['name']][1])))
for model_predictions in self.input_data['state']['model_predictions']:
for _ in range(model_predictions['number_of_values_to_derive']):
observations_low.append(eval(
str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][0])))
observation_high.append(eval(
str(self.input_data['actions']['RL_boundaries'][model_predictions['name']][1])))
actions_low = []
actions_high = []
for action in self.input_data['actions']['action_space']:
if action['type'] in ['int', 'double', 'float']:
actions_low.append(-1)
actions_high.append(1)
if action['type'] in ['list']:
actions_low.append(0)
actions_high.append(len(action['list']))
self.observation_space = Box(low=np.array(
observations_low), high=np.array(observation_high))
self.action_space = Box(low=np.array(
actions_low), high=np.array(actions_high))
self.reset()
def formate_string(self, my_str):
op_list = my_str.split("{")
final = []
for op in op_list:
if "}" in op:
if len(op.split("}")[0].split("[")) > 1:
final.append("self.my_data["+"'"+op.split("}")[0].split(
"[")[0]+"']" + op[op.find('['): op.rfind(']')+1] + op.split("}")[1])
else:
final.append(
"self.my_data["+"'"+op.split("}")[0]+"'" + "]"+op.split("}")[1])
else:
final.append(op)
return "".join(final)
def step(self, action):
"""
Execute an action in the environment and return the next state, reward, and done flag.
"""
self.my_data['actions'] = action
self.my_data['reward'] = 0
for indx, action_var in enumerate(self.input_data['actions']['action_space']):
if action_var['type'] in ['int', 'double', 'float']:
scalled_action = ((action[indx] - self.action_space_range[0])/(self.action_space_range[1] - self.action_space_range[0]))*(self.input_data['actions']['RL_boundaries']
[action_var['name']][1] - self.input_data['actions']['RL_boundaries'][action_var['name']][0]) + self.input_data['actions']['RL_boundaries'][action_var['name']][0]
self.my_data[action_var['name']] = scalled_action
if action_var['type'] in ['list']:
self.my_data[action_var['name']] = action_var['list'][int(
np.floor(action[indx] - 0.000000001))]
for calc in self.input_data['environment']['step']:
exec(self.formate_string(calc))
for calc in self.input_data['environment']['reward']:
exec(self.formate_string(calc))
self.t += 1
if self.t == self.T-1:
self.episode += 1
self.done = True
global df2
next_state = self.get_next_step()
return next_state, self.my_data['reward'], self.done, {"scalled_action": scalled_action}
def reset(self):
"""
Reset the environment to its initial state.
"""
self.t = 0
self.done = False
observations = self.observation_space.sample()
index = 0
for variable in self.input_data['state']['observable_factors']:
if 'starting_value' in variable:
observations[index] = variable['starting_value']
self.my_data[variable['name']] = observations[index]
index += 1
for variable in self.input_data['state']['model_predictions']:
if variable['number_of_values_to_derive'] > 1:
my_list = []
for _ in range(variable['number_of_values_to_derive']):
my_list.append(observations[index])
index += 1
self.my_data[variable['name']] = my_list
elif variable['number_of_values_to_derive'] == 1:
self.my_data[variable['name']] = observations[index]
if self.input_data['state']['constant_factors'] != None:
for key in self.input_data['state']['constant_factors'].keys():
self.my_data[key] = self.input_data['state']['constant_factors'][key]
return observations
def get_next_step(self):
observations = []
for variable in self.input_data['state']['observable_factors']:
observations.append(self.my_data[variable['name']])
for variable in self.input_data['state']['model_predictions']:
if variable['number_of_values_to_derive'] > 1:
if variable['model_type'] == 'time_series':
self.my_data[variable['name']] = [my_predict(
i+self.t, variable['model_path']) for i in range(variable['number_of_values_to_derive'])]
observations = np.hstack(
(np.array(observations), np.array(self.my_data[variable['name']])))
elif variable['model_type'] == 'regression':
for i in range(variable['number_of_values_to_derive']):
input_data = []
for input in variable['input_variables']:
if isinstance(self.my_data[input], Iterable):
input_data.append(self.my_data[input][0])
else:
input_data.append(self.my_data[input])
self.my_data[variable['name']] = predict_regression(
variable['model_path'], [input_data])
observations = np.append(observations, predict_regression(
variable['model_path'], [input_data]))
elif variable['number_of_values_to_derive'] == 1:
if variable['model_type'] == 'time_series':
self.my_data[variable['name']] = my_predict(
i+self.t, variable['model_path'])
observations = np.hstack(
(np.array(observations), np.array(self.my_data[variable['name']])))
elif variable['model_type'] == 'regression':
for i in range(variable['number_of_values_to_derive']):
input_data = []
for input in variable['input_variables']:
if isinstance(self.my_data[input], Iterable):
input_data.append(self.my_data[input][0])
else:
input_data.append(self.my_data[input])
self.my_data[variable['name']] = predict_regression(
variable['model_path'], [input_data])
observations = np.append(observations, predict_regression(
variable['model_path'], [input_data]))
return observations