|
from sklearn.metrics import mean_absolute_error,mean_squared_error |
|
import numpy as np |
|
import pandas as pd |
|
|
|
def create_week_date_featues(df,date_column): |
|
|
|
df['Month'] = pd.to_datetime(df[date_column]).dt.month |
|
|
|
df['Day'] = pd.to_datetime(df[date_column]).dt.day |
|
|
|
df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek |
|
|
|
df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear |
|
|
|
df['Week'] = pd.to_datetime(df[date_column]).dt.week |
|
|
|
df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter |
|
|
|
df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1) |
|
|
|
df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1) |
|
|
|
df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1) |
|
|
|
df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1) |
|
|
|
df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1) |
|
|
|
df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1) |
|
|
|
df['Semester'] = np.where(df[date_column].isin([1,2]),1,2) |
|
|
|
df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0) |
|
|
|
df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0) |
|
|
|
df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month |
|
|
|
return df |
|
|
|
def val_prediction(validation,model:object,train_dataset:pd.DataFrame(),store_id:str='1',item_id:str='1'): |
|
predictions = model.predict(validation.filter(lambda x: (x.store ==store_id) & (x.item ==item_id)), |
|
return_y=True, |
|
return_x=True, |
|
trainer_kwargs=dict(accelerator="cpu")) |
|
|
|
filter_train=train_dataset.loc[(train_dataset['store']==store_id) & (train_dataset['item']==item_id)].reset_index(drop=True) |
|
|
|
training_results=filter_train.iloc[-30:,:] |
|
y=[float(i) for i in predictions.output[0]] |
|
y_true=[float(i) for i in predictions.y[0][0]] |
|
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]] |
|
training_results['prediction']=y |
|
training_results['y_true']=y_true |
|
training_results['x']=x |
|
rmse=np.around(np.sqrt(mean_squared_error(training_results['Lead_1'],y)),2) |
|
mae=np.around(mean_absolute_error(training_results['Lead_1'],y),2) |
|
print(f" VAL DATA = Item ID : {item_id} :: MAE : {mae} :: RMSE : {rmse}") |
|
return training_results |
|
|
|
def test_prediction(model:object,train_dataset,test_dataset,earliest_time,max_encoder_length=120,store_id:str='1',item_id:str='1'): |
|
|
|
encoder_data = train_dataset[lambda x: x.days_from_start > x.days_from_start.max() - max_encoder_length] |
|
last_data = train_dataset[lambda x: x.days_from_start == x.days_from_start.max()] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
decoder_data=test_dataset.copy() |
|
|
|
new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True) |
|
filter_test=new_prediction_data.loc[(new_prediction_data['store']==store_id) & (new_prediction_data['item']==item_id)] |
|
predictions = model.predict(filter_test, |
|
return_y=True, |
|
return_x=True, |
|
trainer_kwargs=dict(accelerator="cpu")) |
|
|
|
|
|
testing_results=test_dataset.loc[(test_dataset['store']=='1') & (test_dataset['item']==item_id)] |
|
y=[float(i) for i in predictions.output[0]] |
|
y_true=[float(i) for i in predictions.y[0][0]] |
|
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]] |
|
testing_results['prediction']=y |
|
testing_results['y_true']=y_true |
|
testing_results['x']=x |
|
return testing_results |
|
|
|
|
|
|
|
|
|
|
|
def val_pred(model:object,train_dataset,validation,consumer_id:str='MT_001'): |
|
predictions = model.predict(validation.filter(lambda x: (x.consumer_id ==consumer_id)), |
|
return_y=True, |
|
return_x=True, |
|
trainer_kwargs=dict(accelerator="cpu")) |
|
|
|
filter_train=train_dataset.loc[(train_dataset['consumer_id']==consumer_id)].reset_index(drop=True) |
|
|
|
|
|
|
|
val_results=filter_train.iloc[-24:,:] |
|
|
|
|
|
y=[float(i) for i in predictions.output[0]] |
|
|
|
y_true=[float(i) for i in predictions.y[0][0]] |
|
|
|
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]] |
|
|
|
val_results['prediction']=y |
|
val_results['y_true']=y_true |
|
val_results['x']=x |
|
|
|
rmse=np.around(np.sqrt(mean_squared_error(val_results['Lead_1'],y)),2) |
|
mae=np.around(mean_absolute_error(val_results['Lead_1'],y),2) |
|
|
|
print(f" VAL DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}") |
|
return val_results |
|
|
|
def test_pred(model:object,train_dataset,test_dataset,consumer_id:str='MT_001',max_encoder_length:int=168): |
|
encoder_data = train_dataset[lambda x: x.hours_from_start > x.hours_from_start.max() - max_encoder_length] |
|
last_data = train_dataset[lambda x: x.hours_from_start == x.hours_from_start.max()] |
|
|
|
decoder_data=test_dataset.copy() |
|
|
|
new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True) |
|
|
|
filter_train=new_prediction_data.loc[ (new_prediction_data['consumer_id']==consumer_id)] |
|
predictions = model.predict(filter_train, |
|
return_y=True, |
|
return_x=True, |
|
trainer_kwargs=dict(accelerator="cpu")) |
|
|
|
|
|
testing_results=test_dataset.loc[(test_dataset['consumer_id']==consumer_id)] |
|
|
|
y=[float(i) for i in predictions.output[0]] |
|
y_true=[float(i) for i in predictions.y[0][0]] |
|
x=[int(i) for i in predictions[1]['decoder_time_idx'][0]] |
|
|
|
testing_results['prediction']=y |
|
testing_results['y_true']=y_true |
|
testing_results['x']=x |
|
|
|
rmse=np.around(np.sqrt(mean_squared_error(testing_results['Lead_1'],y)),2) |
|
mae=np.around(mean_absolute_error(testing_results['Lead_1'],y),2) |
|
print(f"TEST DATA = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}") |
|
return testing_results |
|
|
|
|
|
|
|
|
|
|
|
|