Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
@@ -8,10 +8,63 @@ def create_new_features(df):
|
|
8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
9 |
return df
|
10 |
|
11 |
-
def normalize(df
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
|
13 |
'view', 'condition', 'sqft_above', 'sqft_basement',
|
14 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
|
15 |
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
9 |
return df
|
10 |
|
11 |
+
def normalize(df):
|
12 |
+
min_dict = {'bedrooms': 0,
|
13 |
+
'bathrooms': 0,
|
14 |
+
'sqft_living': 370,
|
15 |
+
'sqft_lot': 638,
|
16 |
+
'floors': 1,
|
17 |
+
'waterfront': 0,
|
18 |
+
'view': 0,
|
19 |
+
'condition': 1,
|
20 |
+
'sqft_above': 370,
|
21 |
+
'sqft_basement': 0,
|
22 |
+
'yr_built': 1900,
|
23 |
+
'yr_renovated': 0,
|
24 |
+
'house_age': 0,
|
25 |
+
'years_since_renovation': 0}
|
26 |
+
|
27 |
+
max_dict = {'bedrooms': 9,
|
28 |
+
'bathrooms': 8,
|
29 |
+
'sqft_living': 13540,
|
30 |
+
'sqft_lot': 1074218,
|
31 |
+
'floors': 3,
|
32 |
+
'waterfront': 1,
|
33 |
+
'view': 4,
|
34 |
+
'condition': 5,
|
35 |
+
'sqft_above': 9410,
|
36 |
+
'sqft_basement': 4820,
|
37 |
+
'yr_built': 2014,
|
38 |
+
'yr_renovated': 2014,
|
39 |
+
'house_age': 114,
|
40 |
+
'years_since_renovation': 2014}
|
41 |
+
|
42 |
numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
|
43 |
'view', 'condition', 'sqft_above', 'sqft_basement',
|
44 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
|
45 |
|
46 |
+
for col in numerical_features:
|
47 |
+
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
48 |
+
return df
|
49 |
+
|
50 |
+
def init_new_pred():
|
51 |
+
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
|
52 |
+
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
|
53 |
+
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
|
54 |
+
'has_basement', 'city_Algona', 'city_Auburn', 'city_Beaux Arts Village',
|
55 |
+
'city_Bellevue', 'city_Black Diamond', 'city_Bothell', 'city_Burien',
|
56 |
+
'city_Carnation', 'city_Clyde Hill', 'city_Covington',
|
57 |
+
'city_Des Moines', 'city_Duvall', 'city_Enumclaw', 'city_Fall City',
|
58 |
+
'city_Federal Way', 'city_Inglewood-Finn Hill', 'city_Issaquah',
|
59 |
+
'city_Kenmore', 'city_Kent', 'city_Kirkland', 'city_Lake Forest Park',
|
60 |
+
'city_Maple Valley', 'city_Medina', 'city_Mercer Island', 'city_Milton',
|
61 |
+
'city_Newcastle', 'city_Normandy Park', 'city_North Bend',
|
62 |
+
'city_Pacific', 'city_Preston', 'city_Ravensdale', 'city_Redmond',
|
63 |
+
'city_Renton', 'city_Sammamish', 'city_SeaTac', 'city_Seattle',
|
64 |
+
'city_Shoreline', 'city_Skykomish', 'city_Snoqualmie',
|
65 |
+
'city_Snoqualmie Pass', 'city_Tukwila', 'city_Vashon',
|
66 |
+
'city_Woodinville', 'city_Yarrow Point']
|
67 |
+
|
68 |
+
new_pred = {key:0 for key in columns}
|
69 |
+
new_pred['date'] = pd.to_datetime('2014-07-10') # do not change
|
70 |
+
return new_pred
|