Nathanotal commited on
Commit
8d6856a
·
1 Parent(s): d47d9c3

fix categorical

Browse files
Files changed (1) hide show
  1. app.py +17 -3
app.py CHANGED
@@ -84,13 +84,26 @@ def normalizeData(df):
84
 
85
  return df
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
  model = downloadModel()
89
 
90
  def sthlm(streetName, area, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt):
91
  soldDate = '2021-01-01' # TODO
92
  price = None
93
- agency = None
94
  brf = 'BRF Kartboken 1' # TODO: remove
95
  lat, lon = getAddressInfo(streetName, number)
96
  gdp, unemployment, interestRate = getFinancialInfo(soldDate)
@@ -98,9 +111,10 @@ def sthlm(streetName, area, number, sqm, rooms, monthlyFee, monthlyCost, floor,
98
  # Parse the input so we can run it through the model
99
  # Create a dataframe from the input values
100
  input_variables = pd.DataFrame(
101
- [[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt, brf,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders)
102
 
103
  df = normalizeData(input_variables)
 
104
 
105
  # Save first row as a numpy array
106
  input_list = df.iloc[0].to_numpy()
@@ -118,7 +132,7 @@ numericalInputs = ['number', 'sqm','rooms', 'monthlyFee','monthlyCost','floor','
118
  categoricalInputs = ['area']
119
  inputs = [gr.inputs.Textbox(lines=1, label='streetName')]
120
  catToInput = {
121
- 'feature': ['a', 'b', 'c']
122
  }
123
 
124
  # Generate the input form
 
84
 
85
  return df
86
 
87
+ def xgbFix(df):
88
+ # XGBoost doesn't like categorical features to be one-hot encoded, so we have to do it manually
89
+ features_to_categorical = ["area", "streetName", "brf", "agency"]
90
+
91
+
92
+ features_to_float = ["number", "sqm", "rooms", "price", "monthlyFee",
93
+ "monthlyCost", "floor", "yearBuilt", "gdp", "unemployment",
94
+ "interestRate", "lat", "lon", "soldDate"]
95
+
96
+
97
+ df[features_to_categorical] = df[features_to_categorical].astype("category")
98
+ df[features_to_float] = df[features_to_float].astype(float)
99
+ return df
100
 
101
  model = downloadModel()
102
 
103
  def sthlm(streetName, area, number, sqm, rooms, monthlyFee, monthlyCost, floor, yearBuilt):
104
  soldDate = '2021-01-01' # TODO
105
  price = None
106
+ agency = 'Notar'
107
  brf = 'BRF Kartboken 1' # TODO: remove
108
  lat, lon = getAddressInfo(streetName, number)
109
  gdp, unemployment, interestRate = getFinancialInfo(soldDate)
 
111
  # Parse the input so we can run it through the model
112
  # Create a dataframe from the input values
113
  input_variables = pd.DataFrame(
114
+ [[area,streetName,number,sqm,rooms,soldDate,monthlyFee,monthlyCost,floor,yearBuilt,brf,agency,lat,lon,gdp,unemployment,interestRate]], columns=columnHeaders)
115
 
116
  df = normalizeData(input_variables)
117
+ df = xgbFix(df)
118
 
119
  # Save first row as a numpy array
120
  input_list = df.iloc[0].to_numpy()
 
132
  categoricalInputs = ['area']
133
  inputs = [gr.inputs.Textbox(lines=1, label='streetName')]
134
  catToInput = {
135
+ 'feature': ['Bromma', 'Abrahamsberg', 'Akalla']
136
  }
137
 
138
  # Generate the input form