matsammut commited on
Commit
fbfa266
·
verified ·
1 Parent(s): 73f0a1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -34
app.py CHANGED
@@ -13,7 +13,7 @@ import hdbscan
13
 
14
 
15
  # # Define the prediction function
16
- def predict_ann(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
17
  # columns = {
18
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
19
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -23,7 +23,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
23
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
24
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
25
  df = pd.DataFrame(data=columns)
26
- fixed_features = cleaning_features(df,race)
27
  print(fixed_features)
28
  # with open('ann_model.pkl', 'rb') as ann_model_file:
29
  # ann_model = pickle.load(ann_model_file)
@@ -33,7 +33,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
33
  # prediction = 1
34
  return "Income >50K" if prediction == 1 else "Income <=50K"
35
 
36
- def predict_rf(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
37
  # columns = {
38
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
39
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -43,7 +43,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
43
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
44
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
45
  df = pd.DataFrame(data=columns)
46
- fixed_features = cleaning_features(df,race)
47
  print(fixed_features)
48
  # with open('ann_model.pkl', 'rb') as ann_model_file:
49
  # ann_model = pickle.load(ann_model_file)
@@ -53,7 +53,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
53
  # prediction = 1
54
  return "Income >50K" if prediction == 1 else "Income <=50K"
55
 
56
- def predict_hb(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
57
  # columns = {
58
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
59
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -63,7 +63,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
63
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
64
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
65
  df = pd.DataFrame(data=columns)
66
- fixed_features = cleaning_features(df,race)
67
  print(fixed_features)
68
  # with open('ann_model.pkl', 'rb') as ann_model_file:
69
  # ann_model = pickle.load(ann_model_file)
@@ -75,7 +75,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
75
  return f"Predicted Cluster (HDBSCAN): {prediction}"
76
 
77
 
78
- def cleaning_features(data,race):
79
  # with open('race_onehot_encoder.pkl', 'rb') as enc_file:
80
  # encoder = pickle.load(enc_file)
81
 
@@ -135,6 +135,12 @@ def cleaning_features(data,race):
135
  data = data.drop(columns=['race'])
136
 
137
  data = pca(data)
 
 
 
 
 
 
138
  return data
139
 
140
  # def pca(data):
@@ -192,11 +198,6 @@ ann_inputs = [
192
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
193
  label="Education"
194
  ),
195
- gr.Dropdown(
196
- ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
197
- "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
198
- label="Marital Status"
199
- ),
200
  gr.Dropdown(
201
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
202
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -205,10 +206,6 @@ ann_inputs = [
205
  "Armed-Forces"],
206
  label="Occupation"
207
  ),
208
- gr.Dropdown(
209
- ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
210
- label="Relationship"
211
- ),
212
  gr.Dropdown(
213
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
214
  label="Race"
@@ -238,11 +235,6 @@ rf_inputs = [
238
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
239
  label="Education"
240
  ),
241
- gr.Dropdown(
242
- ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
243
- "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
244
- label="Marital Status"
245
- ),
246
  gr.Dropdown(
247
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
248
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -251,10 +243,6 @@ rf_inputs = [
251
  "Armed-Forces"],
252
  label="Occupation"
253
  ),
254
- gr.Dropdown(
255
- ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
256
- label="Relationship"
257
- ),
258
  gr.Dropdown(
259
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
260
  label="Race"
@@ -284,11 +272,6 @@ hbd_inputs = [
284
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
285
  label="Education"
286
  ),
287
- gr.Dropdown(
288
- ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
289
- "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
290
- label="Marital Status"
291
- ),
292
  gr.Dropdown(
293
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
294
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -297,10 +280,6 @@ hbd_inputs = [
297
  "Armed-Forces"],
298
  label="Occupation"
299
  ),
300
- gr.Dropdown(
301
- ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
302
- label="Relationship"
303
- ),
304
  gr.Dropdown(
305
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
306
  label="Race"
 
13
 
14
 
15
  # # Define the prediction function
16
+ def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
17
  # columns = {
18
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
19
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
23
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
24
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
25
  df = pd.DataFrame(data=columns)
26
+ fixed_features = cleaning_features(df,race,False)
27
  print(fixed_features)
28
  # with open('ann_model.pkl', 'rb') as ann_model_file:
29
  # ann_model = pickle.load(ann_model_file)
 
33
  # prediction = 1
34
  return "Income >50K" if prediction == 1 else "Income <=50K"
35
 
36
+ def predict_rf(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
37
  # columns = {
38
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
39
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
43
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
44
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
45
  df = pd.DataFrame(data=columns)
46
+ fixed_features = cleaning_features(df,race,False)
47
  print(fixed_features)
48
  # with open('ann_model.pkl', 'rb') as ann_model_file:
49
  # ann_model = pickle.load(ann_model_file)
 
53
  # prediction = 1
54
  return "Income >50K" if prediction == 1 else "Income <=50K"
55
 
56
+ def predict_hb(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
57
  # columns = {
58
  # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
59
  # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
 
63
  "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
64
  "hours-per-week":[hours_per_week], "native-country":[native_country]}
65
  df = pd.DataFrame(data=columns)
66
+ fixed_features = cleaning_features(df,race,True)
67
  print(fixed_features)
68
  # with open('ann_model.pkl', 'rb') as ann_model_file:
69
  # ann_model = pickle.load(ann_model_file)
 
75
  return f"Predicted Cluster (HDBSCAN): {prediction}"
76
 
77
 
78
+ def cleaning_features(data,race,hdbscan):
79
  # with open('race_onehot_encoder.pkl', 'rb') as enc_file:
80
  # encoder = pickle.load(enc_file)
81
 
 
135
  data = data.drop(columns=['race'])
136
 
137
  data = pca(data)
138
+ if(hdbscan):
139
+ data['capital-gain'] = np.log1p(data['capital-gain'])
140
+ data['capital-loss'] = np.log1p(data['capital-loss'])
141
+ scaler = joblib.load("robust_scaler.pkl")
142
+ numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
143
+ data[numerical_features] = scaler.transform(data[numerical_features])
144
  return data
145
 
146
  # def pca(data):
 
198
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
199
  label="Education"
200
  ),
 
 
 
 
 
201
  gr.Dropdown(
202
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
203
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
 
206
  "Armed-Forces"],
207
  label="Occupation"
208
  ),
 
 
 
 
209
  gr.Dropdown(
210
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
211
  label="Race"
 
235
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
236
  label="Education"
237
  ),
 
 
 
 
 
238
  gr.Dropdown(
239
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
240
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
 
243
  "Armed-Forces"],
244
  label="Occupation"
245
  ),
 
 
 
 
246
  gr.Dropdown(
247
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
248
  label="Race"
 
272
  "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
273
  label="Education"
274
  ),
 
 
 
 
 
275
  gr.Dropdown(
276
  ["Tech-support", "Craft-repair", "Other-service", "Sales",
277
  "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
 
280
  "Armed-Forces"],
281
  label="Occupation"
282
  ),
 
 
 
 
283
  gr.Dropdown(
284
  ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
285
  label="Race"