Spaces:
Paused
Paused
Update utils.py
Browse files
utils.py
CHANGED
@@ -6,38 +6,13 @@ def create_new_features(df):
|
|
6 |
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
|
7 |
df.drop(columns=['year_sold'], inplace=True)
|
8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
|
|
|
|
9 |
return df
|
10 |
|
11 |
def normalize(df):
|
12 |
-
|
13 |
-
|
14 |
-
# 'sqft_living': 370,
|
15 |
-
# 'sqft_lot': 638,
|
16 |
-
# 'floors': 1,
|
17 |
-
# 'waterfront': 0,
|
18 |
-
# 'view': 0,
|
19 |
-
# 'condition': 1,
|
20 |
-
# 'sqft_above': 370,
|
21 |
-
# 'sqft_basement': 0,
|
22 |
-
# 'yr_built': 1900,
|
23 |
-
# 'yr_renovated': 0,
|
24 |
-
# 'house_age': 0,
|
25 |
-
# 'years_since_renovation': 0}
|
26 |
-
# max_dict = {'bedrooms': 9,
|
27 |
-
# 'bathrooms': 8,
|
28 |
-
# 'sqft_living': 13540,
|
29 |
-
# 'sqft_lot': 1074218,
|
30 |
-
# 'floors': 3,
|
31 |
-
# 'waterfront': 1,
|
32 |
-
# 'view': 4,
|
33 |
-
# 'condition': 5,
|
34 |
-
# 'sqft_above': 9410,
|
35 |
-
# 'sqft_basement': 4820,
|
36 |
-
# 'yr_built': 2014,
|
37 |
-
# 'yr_renovated': 2014,
|
38 |
-
# 'house_age': 114,
|
39 |
-
# 'years_since_renovation': 2014}
|
40 |
-
|
41 |
with open("./min_dict.json", "r") as f:
|
42 |
min_dict = json.load(f)
|
43 |
|
@@ -52,8 +27,18 @@ def normalize(df):
|
|
52 |
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
53 |
return df
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
def init_new_pred():
|
56 |
import pandas as pd
|
|
|
57 |
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
|
58 |
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
|
59 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
|
|
|
6 |
df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
|
7 |
df.drop(columns=['year_sold'], inplace=True)
|
8 |
df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
|
9 |
+
mask = df['yr_renovated'] == 0
|
10 |
+
df.loc[mask, 'yr_renovated'] = df.loc[mask, 'yr_built']
|
11 |
return df
|
12 |
|
13 |
def normalize(df):
|
14 |
+
import json
|
15 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
with open("./min_dict.json", "r") as f:
|
17 |
min_dict = json.load(f)
|
18 |
|
|
|
27 |
df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
|
28 |
return df
|
29 |
|
30 |
+
def bucketize(df):
|
31 |
+
bucket_sizes = {'sqft_living': 25,
|
32 |
+
'sqft_lot': 25,
|
33 |
+
'sqft_above': 25,
|
34 |
+
'sqft_basement': 25}
|
35 |
+
for col, size in bucket_sizes.items():
|
36 |
+
df[col] = df[col].apply(lambda x: (x // size)*size)
|
37 |
+
return df
|
38 |
+
|
39 |
def init_new_pred():
|
40 |
import pandas as pd
|
41 |
+
|
42 |
columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
|
43 |
'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
|
44 |
'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
|