Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -13,7 +13,7 @@ import hdbscan
|
|
13 |
|
14 |
|
15 |
# # Define the prediction function
|
16 |
-
def predict_ann(age, workclass, education,
|
17 |
# columns = {
|
18 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
19 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
@@ -23,7 +23,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
|
|
23 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
24 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
25 |
df = pd.DataFrame(data=columns)
|
26 |
-
fixed_features = cleaning_features(df,race)
|
27 |
print(fixed_features)
|
28 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
29 |
# ann_model = pickle.load(ann_model_file)
|
@@ -33,7 +33,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
|
|
33 |
# prediction = 1
|
34 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
35 |
|
36 |
-
def predict_rf(age, workclass, education,
|
37 |
# columns = {
|
38 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
39 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
@@ -43,7 +43,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
|
|
43 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
44 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
45 |
df = pd.DataFrame(data=columns)
|
46 |
-
fixed_features = cleaning_features(df,race)
|
47 |
print(fixed_features)
|
48 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
49 |
# ann_model = pickle.load(ann_model_file)
|
@@ -53,7 +53,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
|
|
53 |
# prediction = 1
|
54 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
55 |
|
56 |
-
def predict_hb(age, workclass, education,
|
57 |
# columns = {
|
58 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
59 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
@@ -63,7 +63,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
|
|
63 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
64 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
65 |
df = pd.DataFrame(data=columns)
|
66 |
-
fixed_features = cleaning_features(df,race)
|
67 |
print(fixed_features)
|
68 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
69 |
# ann_model = pickle.load(ann_model_file)
|
@@ -75,7 +75,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
|
|
75 |
return f"Predicted Cluster (HDBSCAN): {prediction}"
|
76 |
|
77 |
|
78 |
-
def cleaning_features(data,race):
|
79 |
# with open('race_onehot_encoder.pkl', 'rb') as enc_file:
|
80 |
# encoder = pickle.load(enc_file)
|
81 |
|
@@ -135,6 +135,12 @@ def cleaning_features(data,race):
|
|
135 |
data = data.drop(columns=['race'])
|
136 |
|
137 |
data = pca(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
return data
|
139 |
|
140 |
# def pca(data):
|
@@ -192,11 +198,6 @@ ann_inputs = [
|
|
192 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
193 |
label="Education"
|
194 |
),
|
195 |
-
gr.Dropdown(
|
196 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
197 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
198 |
-
label="Marital Status"
|
199 |
-
),
|
200 |
gr.Dropdown(
|
201 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
202 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
@@ -205,10 +206,6 @@ ann_inputs = [
|
|
205 |
"Armed-Forces"],
|
206 |
label="Occupation"
|
207 |
),
|
208 |
-
gr.Dropdown(
|
209 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
210 |
-
label="Relationship"
|
211 |
-
),
|
212 |
gr.Dropdown(
|
213 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
214 |
label="Race"
|
@@ -238,11 +235,6 @@ rf_inputs = [
|
|
238 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
239 |
label="Education"
|
240 |
),
|
241 |
-
gr.Dropdown(
|
242 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
243 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
244 |
-
label="Marital Status"
|
245 |
-
),
|
246 |
gr.Dropdown(
|
247 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
248 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
@@ -251,10 +243,6 @@ rf_inputs = [
|
|
251 |
"Armed-Forces"],
|
252 |
label="Occupation"
|
253 |
),
|
254 |
-
gr.Dropdown(
|
255 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
256 |
-
label="Relationship"
|
257 |
-
),
|
258 |
gr.Dropdown(
|
259 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
260 |
label="Race"
|
@@ -284,11 +272,6 @@ hbd_inputs = [
|
|
284 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
285 |
label="Education"
|
286 |
),
|
287 |
-
gr.Dropdown(
|
288 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
289 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
290 |
-
label="Marital Status"
|
291 |
-
),
|
292 |
gr.Dropdown(
|
293 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
294 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
@@ -297,10 +280,6 @@ hbd_inputs = [
|
|
297 |
"Armed-Forces"],
|
298 |
label="Occupation"
|
299 |
),
|
300 |
-
gr.Dropdown(
|
301 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
302 |
-
label="Relationship"
|
303 |
-
),
|
304 |
gr.Dropdown(
|
305 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
306 |
label="Race"
|
|
|
13 |
|
14 |
|
15 |
# # Define the prediction function
|
16 |
+
def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
17 |
# columns = {
|
18 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
19 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
23 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
24 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
25 |
df = pd.DataFrame(data=columns)
|
26 |
+
fixed_features = cleaning_features(df,race,False)
|
27 |
print(fixed_features)
|
28 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
29 |
# ann_model = pickle.load(ann_model_file)
|
|
|
33 |
# prediction = 1
|
34 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
35 |
|
36 |
+
def predict_rf(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
37 |
# columns = {
|
38 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
39 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
43 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
44 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
45 |
df = pd.DataFrame(data=columns)
|
46 |
+
fixed_features = cleaning_features(df,race,False)
|
47 |
print(fixed_features)
|
48 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
49 |
# ann_model = pickle.load(ann_model_file)
|
|
|
53 |
# prediction = 1
|
54 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
55 |
|
56 |
+
def predict_hb(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
57 |
# columns = {
|
58 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
59 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
63 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
64 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
65 |
df = pd.DataFrame(data=columns)
|
66 |
+
fixed_features = cleaning_features(df,race,True)
|
67 |
print(fixed_features)
|
68 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
69 |
# ann_model = pickle.load(ann_model_file)
|
|
|
75 |
return f"Predicted Cluster (HDBSCAN): {prediction}"
|
76 |
|
77 |
|
78 |
+
def cleaning_features(data,race,hdbscan):
|
79 |
# with open('race_onehot_encoder.pkl', 'rb') as enc_file:
|
80 |
# encoder = pickle.load(enc_file)
|
81 |
|
|
|
135 |
data = data.drop(columns=['race'])
|
136 |
|
137 |
data = pca(data)
|
138 |
+
if(hdbscan):
|
139 |
+
data['capital-gain'] = np.log1p(data['capital-gain'])
|
140 |
+
data['capital-loss'] = np.log1p(data['capital-loss'])
|
141 |
+
scaler = joblib.load("robust_scaler.pkl")
|
142 |
+
numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
|
143 |
+
data[numerical_features] = scaler.transform(data[numerical_features])
|
144 |
return data
|
145 |
|
146 |
# def pca(data):
|
|
|
198 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
199 |
label="Education"
|
200 |
),
|
|
|
|
|
|
|
|
|
|
|
201 |
gr.Dropdown(
|
202 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
203 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
206 |
"Armed-Forces"],
|
207 |
label="Occupation"
|
208 |
),
|
|
|
|
|
|
|
|
|
209 |
gr.Dropdown(
|
210 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
211 |
label="Race"
|
|
|
235 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
236 |
label="Education"
|
237 |
),
|
|
|
|
|
|
|
|
|
|
|
238 |
gr.Dropdown(
|
239 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
240 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
243 |
"Armed-Forces"],
|
244 |
label="Occupation"
|
245 |
),
|
|
|
|
|
|
|
|
|
246 |
gr.Dropdown(
|
247 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
248 |
label="Race"
|
|
|
272 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
273 |
label="Education"
|
274 |
),
|
|
|
|
|
|
|
|
|
|
|
275 |
gr.Dropdown(
|
276 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
277 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
280 |
"Armed-Forces"],
|
281 |
label="Occupation"
|
282 |
),
|
|
|
|
|
|
|
|
|
283 |
gr.Dropdown(
|
284 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
285 |
label="Race"
|