RMHalak commited on
Commit
344f42c
1 Parent(s): cd9721e

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +14 -29
utils.py CHANGED
@@ -6,38 +6,13 @@ def create_new_features(df):
6
  df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
7
  df.drop(columns=['year_sold'], inplace=True)
8
  df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
 
 
9
  return df
10
 
11
  def normalize(df):
12
- # min_dict = {'bedrooms': 0,
13
- # 'bathrooms': 0,
14
- # 'sqft_living': 370,
15
- # 'sqft_lot': 638,
16
- # 'floors': 1,
17
- # 'waterfront': 0,
18
- # 'view': 0,
19
- # 'condition': 1,
20
- # 'sqft_above': 370,
21
- # 'sqft_basement': 0,
22
- # 'yr_built': 1900,
23
- # 'yr_renovated': 0,
24
- # 'house_age': 0,
25
- # 'years_since_renovation': 0}
26
- # max_dict = {'bedrooms': 9,
27
- # 'bathrooms': 8,
28
- # 'sqft_living': 13540,
29
- # 'sqft_lot': 1074218,
30
- # 'floors': 3,
31
- # 'waterfront': 1,
32
- # 'view': 4,
33
- # 'condition': 5,
34
- # 'sqft_above': 9410,
35
- # 'sqft_basement': 4820,
36
- # 'yr_built': 2014,
37
- # 'yr_renovated': 2014,
38
- # 'house_age': 114,
39
- # 'years_since_renovation': 2014}
40
-
41
  with open("./min_dict.json", "r") as f:
42
  min_dict = json.load(f)
43
 
@@ -52,8 +27,18 @@ def normalize(df):
52
  df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
53
  return df
54
 
 
 
 
 
 
 
 
 
 
55
  def init_new_pred():
56
  import pandas as pd
 
57
  columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
58
  'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
59
  'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
 
6
  df['years_since_renovation'] = df['year_sold'] - df['yr_renovated']
7
  df.drop(columns=['year_sold'], inplace=True)
8
  df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
9
+ mask = df['yr_renovated'] == 0
10
+ df.loc[mask, 'yr_renovated'] = df.loc[mask, 'yr_built']
11
  return df
12
 
13
  def normalize(df):
14
+ import json
15
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  with open("./min_dict.json", "r") as f:
17
  min_dict = json.load(f)
18
 
 
27
  df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
28
  return df
29
 
30
+ def bucketize(df):
31
+ bucket_sizes = {'sqft_living': 25,
32
+ 'sqft_lot': 25,
33
+ 'sqft_above': 25,
34
+ 'sqft_basement': 25}
35
+ for col, size in bucket_sizes.items():
36
+ df[col] = df[col].apply(lambda x: (x // size)*size)
37
+ return df
38
+
39
  def init_new_pred():
40
  import pandas as pd
41
+
42
  columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
43
  'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
44
  'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',