# create new features def create_new_features(df): df['year_sold'] = df['date'].dt.year df = df.drop(columns=['date']) df['house_age'] = df['year_sold'] - df['yr_built'] df['years_since_renovation'] = df['year_sold'] - df['yr_renovated'] df.drop(columns=['year_sold'], inplace=True) df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0) return df def normalize(df): min_dict = {'bedrooms': 0, 'bathrooms': 0, 'sqft_living': 370, 'sqft_lot': 638, 'floors': 1, 'waterfront': 0, 'view': 0, 'condition': 1, 'sqft_above': 370, 'sqft_basement': 0, 'yr_built': 1900, 'yr_renovated': 0, 'house_age': 0, 'years_since_renovation': 0} max_dict = {'bedrooms': 9, 'bathrooms': 8, 'sqft_living': 13540, 'sqft_lot': 1074218, 'floors': 3, 'waterfront': 1, 'view': 4, 'condition': 5, 'sqft_above': 9410, 'sqft_basement': 4820, 'yr_built': 2014, 'yr_renovated': 2014, 'house_age': 114, 'years_since_renovation': 2014} numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation'] for col in numerical_features: df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col])) return df def init_new_pred(): import pandas as pd columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation', 'has_basement', 'city_Algona', 'city_Auburn', 'city_Beaux Arts Village', 'city_Bellevue', 'city_Black Diamond', 'city_Bothell', 'city_Burien', 'city_Carnation', 'city_Clyde Hill', 'city_Covington', 'city_Des Moines', 'city_Duvall', 'city_Enumclaw', 'city_Fall City', 'city_Federal Way', 'city_Inglewood-Finn Hill', 'city_Issaquah', 'city_Kenmore', 'city_Kent', 'city_Kirkland', 'city_Lake Forest Park', 'city_Maple Valley', 'city_Medina', 'city_Mercer Island', 'city_Milton', 'city_Newcastle', 'city_Normandy Park', 'city_North Bend', 'city_Pacific', 'city_Preston', 'city_Ravensdale', 'city_Redmond', 'city_Renton', 'city_Sammamish', 'city_SeaTac', 'city_Seattle', 'city_Shoreline', 'city_Skykomish', 'city_Snoqualmie', 'city_Snoqualmie Pass', 'city_Tukwila', 'city_Vashon', 'city_Woodinville', 'city_Yarrow Point'] new_pred = {key:0 for key in columns} new_pred['date'] = pd.to_datetime('2014-07-10') # do not change return new_pred