File size: 825 Bytes
80df7fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
# create new features
def create_new_features(df):
    df['year_sold'] = df['date'].dt.year
    df = df.drop(columns=['date'])
    df['house_age'] = df['year_sold'] - df['yr_built']
    df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
    df.drop(columns=['year_sold'], inplace=True)
    df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
    return df

def normalize(df, col, min_dict, max_dict):
    numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
                          'view', 'condition', 'sqft_above', 'sqft_basement',
                          'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
    
    df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
    return df[col]