Spaces:
Paused
Paused
# create new features | |
def create_new_features(df): | |
df['year_sold'] = df['date'].dt.year | |
df = df.drop(columns=['date']) | |
df['house_age'] = df['year_sold'] - df['yr_built'] | |
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated'] | |
df.drop(columns=['year_sold'], inplace=True) | |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0) | |
return df | |
def normalize(df): | |
min_dict = {'bedrooms': 0, | |
'bathrooms': 0, | |
'sqft_living': 370, | |
'sqft_lot': 638, | |
'floors': 1, | |
'waterfront': 0, | |
'view': 0, | |
'condition': 1, | |
'sqft_above': 370, | |
'sqft_basement': 0, | |
'yr_built': 1900, | |
'yr_renovated': 0, | |
'house_age': 0, | |
'years_since_renovation': 0} | |
max_dict = {'bedrooms': 9, | |
'bathrooms': 8, | |
'sqft_living': 13540, | |
'sqft_lot': 1074218, | |
'floors': 3, | |
'waterfront': 1, | |
'view': 4, | |
'condition': 5, | |
'sqft_above': 9410, | |
'sqft_basement': 4820, | |
'yr_built': 2014, | |
'yr_renovated': 2014, | |
'house_age': 114, | |
'years_since_renovation': 2014} | |
numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront', | |
'view', 'condition', 'sqft_above', 'sqft_basement', | |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation'] | |
for col in numerical_features: | |
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col])) | |
return df | |
def init_new_pred(): | |
import pandas as pd | |
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', | |
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement', | |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation', | |
'has_basement', 'city_Algona', 'city_Auburn', 'city_Beaux Arts Village', | |
'city_Bellevue', 'city_Black Diamond', 'city_Bothell', 'city_Burien', | |
'city_Carnation', 'city_Clyde Hill', 'city_Covington', | |
'city_Des Moines', 'city_Duvall', 'city_Enumclaw', 'city_Fall City', | |
'city_Federal Way', 'city_Inglewood-Finn Hill', 'city_Issaquah', | |
'city_Kenmore', 'city_Kent', 'city_Kirkland', 'city_Lake Forest Park', | |
'city_Maple Valley', 'city_Medina', 'city_Mercer Island', 'city_Milton', | |
'city_Newcastle', 'city_Normandy Park', 'city_North Bend', | |
'city_Pacific', 'city_Preston', 'city_Ravensdale', 'city_Redmond', | |
'city_Renton', 'city_Sammamish', 'city_SeaTac', 'city_Seattle', | |
'city_Shoreline', 'city_Skykomish', 'city_Snoqualmie', | |
'city_Snoqualmie Pass', 'city_Tukwila', 'city_Vashon', | |
'city_Woodinville', 'city_Yarrow Point'] | |
new_pred = {key:0 for key in columns} | |
new_pred['date'] = pd.to_datetime('2014-07-10') # do not change | |
return new_pred |