File size: 7,137 Bytes
75ae889
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
from sklearn.metrics import mean_absolute_error,mean_squared_error
import numpy as np
import pandas as pd

def create_week_date_featues(df,date_column):

    df['Month'] = pd.to_datetime(df[date_column]).dt.month

    df['Day'] = pd.to_datetime(df[date_column]).dt.day

    df['Dayofweek'] = pd.to_datetime(df[date_column]).dt.dayofweek

    df['DayOfyear'] = pd.to_datetime(df[date_column]).dt.dayofyear

    df['Week'] = pd.to_datetime(df[date_column]).dt.week

    df['Quarter'] = pd.to_datetime(df[date_column]).dt.quarter

    df['Is_month_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_start,0,1)

    df['Is_month_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_month_end,0,1)

    df['Is_quarter_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_start,0,1)

    df['Is_quarter_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_quarter_end,0,1)

    df['Is_year_start'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_start,0,1)

    df['Is_year_end'] = np.where(pd.to_datetime(df[date_column]).dt.is_year_end,0,1)

    df['Semester'] = np.where(df[date_column].isin([1,2]),1,2)

    df['Is_weekend'] = np.where(df[date_column].isin([5,6]),1,0)

    df['Is_weekday'] = np.where(df[date_column].isin([0,1,2,3,4]),1,0)

    df['Days_in_month'] = pd.to_datetime(df[date_column]).dt.days_in_month

    return df

def val_prediction(validation,model:object,train_dataset:pd.DataFrame(),store_id:str='1',item_id:str='1'):
      predictions = model.predict(validation.filter(lambda x: (x.store ==store_id) & (x.item ==item_id)),
                  return_y=True,
                  return_x=True,
                  trainer_kwargs=dict(accelerator="cpu"))

      filter_train=train_dataset.loc[(train_dataset['store']==store_id) & (train_dataset['item']==item_id)].reset_index(drop=True)
      # print(filter_train)
      training_results=filter_train.iloc[-30:,:]
      y=[float(i) for i in predictions.output[0]]
      y_true=[float(i) for i in predictions.y[0][0]]
      x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
      training_results['prediction']=y
      training_results['y_true']=y_true
      training_results['x']=x
      rmse=np.around(np.sqrt(mean_squared_error(training_results['Lead_1'],y)),2)
      mae=np.around(mean_absolute_error(training_results['Lead_1'],y),2)
      print(f" VAL DATA  = Item ID : {item_id} :: MAE : {mae} :: RMSE : {rmse}")
      return training_results

def test_prediction(model:object,train_dataset,test_dataset,earliest_time,max_encoder_length=120,store_id:str='1',item_id:str='1'):
    #encoder data is the last lookback window: we get the last 1 week (168 datapoints) for all 5 consumers = 840 total datapoints
    encoder_data = train_dataset[lambda x: x.days_from_start > x.days_from_start.max() - max_encoder_length]
    last_data =  train_dataset[lambda x: x.days_from_start == x.days_from_start.max()]
    # decoder_data = pd.concat(
    # [last_data.assign(date=lambda x: x.date + pd.offsets.DateOffset(i)) for i in range(1, 30 + 1)],
    # ignore_index=True,
    #   )

    # decoder_data["hours_from_start"] = (decoder_data["date"] - earliest_time).dt.seconds / 60 / 60 + (decoder_data["date"] - earliest_time).dt.days * 24
    # decoder_data['hours_from_start'] = decoder_data['hours_from_start'].astype('int')
    # decoder_data["hours_from_start"] += encoder_data["hours_from_start"].max() + 1 - decoder_data["hours_from_start"].min()
    # # add time index consistent with "data"
    # decoder_data["days_from_start"] = (decoder_data["date"] - earliest_time).apply(lambda x:x.days)
    # decoder_data=create_week_date_featues(decoder_data,'date')
    decoder_data=test_dataset.copy()
    
    new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)
    filter_test=new_prediction_data.loc[(new_prediction_data['store']==store_id) & (new_prediction_data['item']==item_id)]
    predictions = model.predict(filter_test,
                return_y=True,
                return_x=True,
                trainer_kwargs=dict(accelerator="cpu"))

    # print(filter_test)
    testing_results=test_dataset.loc[(test_dataset['store']=='1') & (test_dataset['item']==item_id)]
    y=[float(i) for i in predictions.output[0]]
    y_true=[float(i) for i in predictions.y[0][0]]
    x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
    testing_results['prediction']=y
    testing_results['y_true']=y_true
    testing_results['x']=x
    return testing_results



#-------------------------------------------------------------

def val_pred(model:object,train_dataset,validation,consumer_id:str='MT_001'):
      predictions = model.predict(validation.filter(lambda x: (x.consumer_id ==consumer_id)),
                  return_y=True,
                  return_x=True,
                  trainer_kwargs=dict(accelerator="cpu"))

      filter_train=train_dataset.loc[(train_dataset['consumer_id']==consumer_id)].reset_index(drop=True)
    
      # print(filter_train)
      # filter validation data
      val_results=filter_train.iloc[-24:,:]
        
      # prediction
      y=[float(i) for i in predictions.output[0]]
      # actual
      y_true=[float(i) for i in predictions.y[0][0]]
      # time idx
      x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
      # update into the validation results
      val_results['prediction']=y
      val_results['y_true']=y_true
      val_results['x']=x
      # RMSE & MAE for validation data
      rmse=np.around(np.sqrt(mean_squared_error(val_results['Lead_1'],y)),2)
      mae=np.around(mean_absolute_error(val_results['Lead_1'],y),2)
    
      print(f" VAL DATA  = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
      return val_results

def test_pred(model:object,train_dataset,test_dataset,consumer_id:str='MT_001',max_encoder_length:int=168):
      encoder_data = train_dataset[lambda x: x.hours_from_start > x.hours_from_start.max() - max_encoder_length]
      last_data =  train_dataset[lambda x: x.hours_from_start == x.hours_from_start.max()]
      
      decoder_data=test_dataset.copy()
      
      new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)

      filter_train=new_prediction_data.loc[ (new_prediction_data['consumer_id']==consumer_id)]
      predictions = model.predict(filter_train,
                  return_y=True,
                  return_x=True,
                  trainer_kwargs=dict(accelerator="cpu"))

      # print(filter_train)
      testing_results=test_dataset.loc[(test_dataset['consumer_id']==consumer_id)]
    
      y=[float(i) for i in predictions.output[0]]
      y_true=[float(i) for i in predictions.y[0][0]]
      x=[int(i) for i in predictions[1]['decoder_time_idx'][0]]
        
      testing_results['prediction']=y
      testing_results['y_true']=y_true
      testing_results['x']=x
    
      rmse=np.around(np.sqrt(mean_squared_error(testing_results['Lead_1'],y)),2)
      mae=np.around(mean_absolute_error(testing_results['Lead_1'],y),2)
      print(f"TEST DATA  = Consumer ID : {consumer_id} :: MAE : {mae} :: RMSE : {rmse}")
      return testing_results