RMHalak commited on
Commit
b3fa682
·
verified ·
1 Parent(s): 5fa9478

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +56 -3
utils.py CHANGED
@@ -8,10 +8,63 @@ def create_new_features(df):
8
  df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
9
  return df
10
 
11
- def normalize(df, col, min_dict, max_dict):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
13
  'view', 'condition', 'sqft_above', 'sqft_basement',
14
  'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
15
 
16
- df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
17
- return df[col]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  df['has_basement'] = df['sqft_basement'].apply(lambda x: 1 if x > 0 else 0)
9
  return df
10
 
11
+ def normalize(df):
12
+ min_dict = {'bedrooms': 0,
13
+ 'bathrooms': 0,
14
+ 'sqft_living': 370,
15
+ 'sqft_lot': 638,
16
+ 'floors': 1,
17
+ 'waterfront': 0,
18
+ 'view': 0,
19
+ 'condition': 1,
20
+ 'sqft_above': 370,
21
+ 'sqft_basement': 0,
22
+ 'yr_built': 1900,
23
+ 'yr_renovated': 0,
24
+ 'house_age': 0,
25
+ 'years_since_renovation': 0}
26
+
27
+ max_dict = {'bedrooms': 9,
28
+ 'bathrooms': 8,
29
+ 'sqft_living': 13540,
30
+ 'sqft_lot': 1074218,
31
+ 'floors': 3,
32
+ 'waterfront': 1,
33
+ 'view': 4,
34
+ 'condition': 5,
35
+ 'sqft_above': 9410,
36
+ 'sqft_basement': 4820,
37
+ 'yr_built': 2014,
38
+ 'yr_renovated': 2014,
39
+ 'house_age': 114,
40
+ 'years_since_renovation': 2014}
41
+
42
  numerical_features = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors', 'waterfront',
43
  'view', 'condition', 'sqft_above', 'sqft_basement',
44
  'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation']
45
 
46
+ for col in numerical_features:
47
+ df[col] = df[col].apply(lambda x: (x-min_dict[col])/(max_dict[col]-min_dict[col]))
48
+ return df
49
+
50
+ def init_new_pred():
51
+ columns = ['bedrooms', 'bathrooms', 'sqft_living', 'sqft_lot', 'floors',
52
+ 'waterfront', 'view', 'condition', 'sqft_above', 'sqft_basement',
53
+ 'yr_built', 'yr_renovated', 'house_age', 'years_since_renovation',
54
+ 'has_basement', 'city_Algona', 'city_Auburn', 'city_Beaux Arts Village',
55
+ 'city_Bellevue', 'city_Black Diamond', 'city_Bothell', 'city_Burien',
56
+ 'city_Carnation', 'city_Clyde Hill', 'city_Covington',
57
+ 'city_Des Moines', 'city_Duvall', 'city_Enumclaw', 'city_Fall City',
58
+ 'city_Federal Way', 'city_Inglewood-Finn Hill', 'city_Issaquah',
59
+ 'city_Kenmore', 'city_Kent', 'city_Kirkland', 'city_Lake Forest Park',
60
+ 'city_Maple Valley', 'city_Medina', 'city_Mercer Island', 'city_Milton',
61
+ 'city_Newcastle', 'city_Normandy Park', 'city_North Bend',
62
+ 'city_Pacific', 'city_Preston', 'city_Ravensdale', 'city_Redmond',
63
+ 'city_Renton', 'city_Sammamish', 'city_SeaTac', 'city_Seattle',
64
+ 'city_Shoreline', 'city_Skykomish', 'city_Snoqualmie',
65
+ 'city_Snoqualmie Pass', 'city_Tukwila', 'city_Vashon',
66
+ 'city_Woodinville', 'city_Yarrow Point']
67
+
68
+ new_pred = {key:0 for key in columns}
69
+ new_pred['date'] = pd.to_datetime('2014-07-10') # do not change
70
+ return new_pred