Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -13,7 +13,7 @@ import hdbscan
|
|
| 13 |
|
| 14 |
|
| 15 |
# # Define the prediction function
|
| 16 |
-
def predict_ann(age, workclass, education,
|
| 17 |
# columns = {
|
| 18 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 19 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
@@ -23,7 +23,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
|
|
| 23 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 24 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 25 |
df = pd.DataFrame(data=columns)
|
| 26 |
-
fixed_features = cleaning_features(df,race)
|
| 27 |
print(fixed_features)
|
| 28 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 29 |
# ann_model = pickle.load(ann_model_file)
|
|
@@ -33,7 +33,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
|
|
| 33 |
# prediction = 1
|
| 34 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
| 35 |
|
| 36 |
-
def predict_rf(age, workclass, education,
|
| 37 |
# columns = {
|
| 38 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 39 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
@@ -43,7 +43,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
|
|
| 43 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 44 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 45 |
df = pd.DataFrame(data=columns)
|
| 46 |
-
fixed_features = cleaning_features(df,race)
|
| 47 |
print(fixed_features)
|
| 48 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 49 |
# ann_model = pickle.load(ann_model_file)
|
|
@@ -53,7 +53,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
|
|
| 53 |
# prediction = 1
|
| 54 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
| 55 |
|
| 56 |
-
def predict_hb(age, workclass, education,
|
| 57 |
# columns = {
|
| 58 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 59 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
@@ -63,7 +63,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
|
|
| 63 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 64 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 65 |
df = pd.DataFrame(data=columns)
|
| 66 |
-
fixed_features = cleaning_features(df,race)
|
| 67 |
print(fixed_features)
|
| 68 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 69 |
# ann_model = pickle.load(ann_model_file)
|
|
@@ -75,7 +75,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
|
|
| 75 |
return f"Predicted Cluster (HDBSCAN): {prediction}"
|
| 76 |
|
| 77 |
|
| 78 |
-
def cleaning_features(data,race):
|
| 79 |
# with open('race_onehot_encoder.pkl', 'rb') as enc_file:
|
| 80 |
# encoder = pickle.load(enc_file)
|
| 81 |
|
|
@@ -135,6 +135,12 @@ def cleaning_features(data,race):
|
|
| 135 |
data = data.drop(columns=['race'])
|
| 136 |
|
| 137 |
data = pca(data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
return data
|
| 139 |
|
| 140 |
# def pca(data):
|
|
@@ -192,11 +198,6 @@ ann_inputs = [
|
|
| 192 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 193 |
label="Education"
|
| 194 |
),
|
| 195 |
-
gr.Dropdown(
|
| 196 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
| 197 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
| 198 |
-
label="Marital Status"
|
| 199 |
-
),
|
| 200 |
gr.Dropdown(
|
| 201 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 202 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
@@ -205,10 +206,6 @@ ann_inputs = [
|
|
| 205 |
"Armed-Forces"],
|
| 206 |
label="Occupation"
|
| 207 |
),
|
| 208 |
-
gr.Dropdown(
|
| 209 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
| 210 |
-
label="Relationship"
|
| 211 |
-
),
|
| 212 |
gr.Dropdown(
|
| 213 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 214 |
label="Race"
|
|
@@ -238,11 +235,6 @@ rf_inputs = [
|
|
| 238 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 239 |
label="Education"
|
| 240 |
),
|
| 241 |
-
gr.Dropdown(
|
| 242 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
| 243 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
| 244 |
-
label="Marital Status"
|
| 245 |
-
),
|
| 246 |
gr.Dropdown(
|
| 247 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 248 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
@@ -251,10 +243,6 @@ rf_inputs = [
|
|
| 251 |
"Armed-Forces"],
|
| 252 |
label="Occupation"
|
| 253 |
),
|
| 254 |
-
gr.Dropdown(
|
| 255 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
| 256 |
-
label="Relationship"
|
| 257 |
-
),
|
| 258 |
gr.Dropdown(
|
| 259 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 260 |
label="Race"
|
|
@@ -284,11 +272,6 @@ hbd_inputs = [
|
|
| 284 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 285 |
label="Education"
|
| 286 |
),
|
| 287 |
-
gr.Dropdown(
|
| 288 |
-
["Married-civ-spouse", "Divorced", "Never-married", "Separated",
|
| 289 |
-
"Widowed", "Married-spouse-absent", "Married-AF-spouse"],
|
| 290 |
-
label="Marital Status"
|
| 291 |
-
),
|
| 292 |
gr.Dropdown(
|
| 293 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 294 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
@@ -297,10 +280,6 @@ hbd_inputs = [
|
|
| 297 |
"Armed-Forces"],
|
| 298 |
label="Occupation"
|
| 299 |
),
|
| 300 |
-
gr.Dropdown(
|
| 301 |
-
["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
|
| 302 |
-
label="Relationship"
|
| 303 |
-
),
|
| 304 |
gr.Dropdown(
|
| 305 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 306 |
label="Race"
|
|
|
|
| 13 |
|
| 14 |
|
| 15 |
# # Define the prediction function
|
| 16 |
+
def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
| 17 |
# columns = {
|
| 18 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 19 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
|
| 23 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 24 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 25 |
df = pd.DataFrame(data=columns)
|
| 26 |
+
fixed_features = cleaning_features(df,race,False)
|
| 27 |
print(fixed_features)
|
| 28 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 29 |
# ann_model = pickle.load(ann_model_file)
|
|
|
|
| 33 |
# prediction = 1
|
| 34 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
| 35 |
|
| 36 |
+
def predict_rf(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
| 37 |
# columns = {
|
| 38 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 39 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
|
| 43 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 44 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 45 |
df = pd.DataFrame(data=columns)
|
| 46 |
+
fixed_features = cleaning_features(df,race,False)
|
| 47 |
print(fixed_features)
|
| 48 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 49 |
# ann_model = pickle.load(ann_model_file)
|
|
|
|
| 53 |
# prediction = 1
|
| 54 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
| 55 |
|
| 56 |
+
def predict_hb(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
|
| 57 |
# columns = {
|
| 58 |
# "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
|
| 59 |
# "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
|
|
|
| 63 |
"race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
|
| 64 |
"hours-per-week":[hours_per_week], "native-country":[native_country]}
|
| 65 |
df = pd.DataFrame(data=columns)
|
| 66 |
+
fixed_features = cleaning_features(df,race,True)
|
| 67 |
print(fixed_features)
|
| 68 |
# with open('ann_model.pkl', 'rb') as ann_model_file:
|
| 69 |
# ann_model = pickle.load(ann_model_file)
|
|
|
|
| 75 |
return f"Predicted Cluster (HDBSCAN): {prediction}"
|
| 76 |
|
| 77 |
|
| 78 |
+
def cleaning_features(data,race,hdbscan):
|
| 79 |
# with open('race_onehot_encoder.pkl', 'rb') as enc_file:
|
| 80 |
# encoder = pickle.load(enc_file)
|
| 81 |
|
|
|
|
| 135 |
data = data.drop(columns=['race'])
|
| 136 |
|
| 137 |
data = pca(data)
|
| 138 |
+
if(hdbscan):
|
| 139 |
+
data['capital-gain'] = np.log1p(data['capital-gain'])
|
| 140 |
+
data['capital-loss'] = np.log1p(data['capital-loss'])
|
| 141 |
+
scaler = joblib.load("robust_scaler.pkl")
|
| 142 |
+
numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
|
| 143 |
+
data[numerical_features] = scaler.transform(data[numerical_features])
|
| 144 |
return data
|
| 145 |
|
| 146 |
# def pca(data):
|
|
|
|
| 198 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 199 |
label="Education"
|
| 200 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
gr.Dropdown(
|
| 202 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 203 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
|
| 206 |
"Armed-Forces"],
|
| 207 |
label="Occupation"
|
| 208 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
gr.Dropdown(
|
| 210 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 211 |
label="Race"
|
|
|
|
| 235 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 236 |
label="Education"
|
| 237 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
gr.Dropdown(
|
| 239 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 240 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
|
| 243 |
"Armed-Forces"],
|
| 244 |
label="Occupation"
|
| 245 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
gr.Dropdown(
|
| 247 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 248 |
label="Race"
|
|
|
|
| 272 |
"1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
|
| 273 |
label="Education"
|
| 274 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 275 |
gr.Dropdown(
|
| 276 |
["Tech-support", "Craft-repair", "Other-service", "Sales",
|
| 277 |
"Exec-managerial", "Prof-specialty", "Handlers-cleaners",
|
|
|
|
| 280 |
"Armed-Forces"],
|
| 281 |
label="Occupation"
|
| 282 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 283 |
gr.Dropdown(
|
| 284 |
["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
|
| 285 |
label="Race"
|