File size: 1,267 Bytes
dad00c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
FEATURES["y_features"] = []
col = FEATURES["y"][0]
new_features = data_preprocess[col].to_frame().copy()

# Lag Features
new_features[col+"_L1D"] = new_features[col].shift(1)
new_features[col+"_L6D"] = new_features[col].shift(6)
new_features[col+"_L7D"] = new_features[col].shift(7)
new_features[col+"_L8D"] = new_features[col].shift(8)
new_features[col+"_L14D"] = new_features[col].shift(14)

# Rolling Features
# After computing shift by 1 to indicate its computed based off a 1 day lag 
new_features[col+"_RMean14D"] = new_features[col].shift(1).rolling(window='14D').mean()
# The last 6 days, I need the prediction from time t-1
# For now set to nan
new_features[col+"_RMean14D"][-6:] = np.nan

# Differencing features
new_features[col+"_Diff7D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(7))
new_features[col+"_Diff14D"] = (new_features[col].shift(1) - new_features[col].shift(1).shift(14))

new_features = new_features.drop(columns=col)
FEATURES["y_features"].extend([col+"_L1D", col+"_L6D", col+"_L7D", col+"_L8D", col+"_L14D", col+"_RMean14D", col+"_Diff7D", col+"_Diff14D"])
data_preprocess = pd.concat([data_preprocess, new_features], axis=1)

assert len(data_preprocess.loc[:, FEATURES["y_features"]].columns) == len(FEATURES["y"])*8