Spaces:
Runtime error
Runtime error
File size: 7,137 Bytes
75ae889 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
from sklearn.metrics import mean_absolute_error,mean_squared_error
import numpy as np
import pandas as pd
def create_week_date_featues(df,date_column):
df['Month'] = pd.to_datetime(df[date_column]).dt.month
df['Day'] = pd.to_datetime(df[date_column]).dt.day
df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek
df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear
df['Week'] = pd.to_datetime(df[date_column]).dt.week
df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter
df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1)
df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1)
df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1)
df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1)
df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1)
df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1)
df['Semester'] = np.where(df[date_column].isin([1,2]),1,2)
df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0)
df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0)
df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month
return df
def val_prediction(validation,model:object,train_dataset:pd.DataFrame(),store_id:str='1',item_id:str='1'):
predictions = model.predict(validation.filter(lambda x: (x.store ==store_id) & (x.item ==item_id)),
return_y=True,
return_x=True,
trainer_kwargs=dict(accelerator="cpu"))
filter_train=train_dataset.loc[(train_dataset['store']==store_id) & (train_dataset['item']==item_id)].reset_index(drop=True)
# print(filter_train)
training_results=filter_train.iloc[-30:,:]
y=[float(i) for i in predictions.output[0]]
y_true=[float(i) for i in predictions.y[0][0]]
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
training_results['prediction']=y
training_results['y_true']=y_true
training_results['x']=x
rmse=np.around(np.sqrt(mean_squared_error(training_results['Lead_1'],y)),2)
mae=np.around(mean_absolute_error(training_results['Lead_1'],y),2)
print(f" VAL DATA = Item ID : {item_id} :: MAE : {mae} :: RMSE : {rmse}")
return training_results
def test_prediction(model:object,train_dataset,test_dataset,earliest_time,max_encoder_length=120,store_id:str='1',item_id:str='1'):
#encoder data is the last lookback window: we get the last 1 week (168 datapoints) for all 5 consumers = 840 total datapoints
encoder_data = train_dataset[lambda x: x.days_from_start > x.days_from_start.max() - max_encoder_length]
last_data = train_dataset[lambda x: x.days_from_start == x.days_from_start.max()]
# decoder_data = pd.concat(
# [last_data.assign(date=lambda x: x.date + pd.offsets.DateOffset(i)) for i in range(1, 30 + 1)],
# ignore_index=True,
# )
# decoder_data["hours_from_start"] = (decoder_data["date"] - earliest_time).dt.seconds / 60 / 60 + (decoder_data["date"] - earliest_time).dt.days * 24
# decoder_data['hours_from_start'] = decoder_data['hours_from_start'].astype('int')
# decoder_data["hours_from_start"] += encoder_data["hours_from_start"].max() + 1 - decoder_data["hours_from_start"].min()
# # add time index consistent with "data"
# decoder_data["days_from_start"] = (decoder_data["date"] - earliest_time).apply(lambda x:x.days)
# decoder_data=create_week_date_featues(decoder_data,'date')
decoder_data=test_dataset.copy()
new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
filter_test=new_prediction_data.loc[(new_prediction_data['store']==store_id) & (new_prediction_data['item']==item_id)]
predictions = model.predict(filter_test,
return_y=True,
return_x=True,
trainer_kwargs=dict(accelerator="cpu"))
# print(filter_test)
testing_results=test_dataset.loc[(test_dataset['store']=='1') & (test_dataset['item']==item_id)]
y=[float(i) for i in predictions.output[0]]
y_true=[float(i) for i in predictions.y[0][0]]
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
testing_results['prediction']=y
testing_results['y_true']=y_true
testing_results['x']=x
return testing_results
#-------------------------------------------------------------
def val_pred(model:object,train_dataset,validation,consumer_id:str='MT_001'):
predictions = model.predict(validation.filter(lambda x: (x.consumer_id ==consumer_id)),
return_y=True,
return_x=True,
trainer_kwargs=dict(accelerator="cpu"))
filter_train=train_dataset.loc[(train_dataset['consumer_id']==consumer_id)].reset_index(drop=True)
# print(filter_train)
# filter validation data
val_results=filter_train.iloc[-24:,:]
# prediction
y=[float(i) for i in predictions.output[0]]
# actual
y_true=[float(i) for i in predictions.y[0][0]]
# time idx
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
# update into the validation results
val_results['prediction']=y
val_results['y_true']=y_true
val_results['x']=x
# RMSE & MAE for validation data
rmse=np.around(np.sqrt(mean_squared_error(val_results['Lead_1'],y)),2)
mae=np.around(mean_absolute_error(val_results['Lead_1'],y),2)
print(f" VAL DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
return val_results
def test_pred(model:object,train_dataset,test_dataset,consumer_id:str='MT_001',max_encoder_length:int=168):
encoder_data = train_dataset[lambda x: x.hours_from_start > x.hours_from_start.max() - max_encoder_length]
last_data = train_dataset[lambda x: x.hours_from_start == x.hours_from_start.max()]
decoder_data=test_dataset.copy()
new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
filter_train=new_prediction_data.loc[ (new_prediction_data['consumer_id']==consumer_id)]
predictions = model.predict(filter_train,
return_y=True,
return_x=True,
trainer_kwargs=dict(accelerator="cpu"))
# print(filter_train)
testing_results=test_dataset.loc[(test_dataset['consumer_id']==consumer_id)]
y=[float(i) for i in predictions.output[0]]
y_true=[float(i) for i in predictions.y[0][0]]
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
testing_results['prediction']=y
testing_results['y_true']=y_true
testing_results['x']=x
rmse=np.around(np.sqrt(mean_squared_error(testing_results['Lead_1'],y)),2)
mae=np.around(mean_absolute_error(testing_results['Lead_1'],y),2)
print(f"TEST DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
return testing_results
|