Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -32,6 +32,8 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
|
|
32 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
33 |
|
34 |
def cleaning_features(data):
|
|
|
|
|
35 |
|
36 |
with open('label_encoder_work.pkl', 'rb') as le_file:
|
37 |
le_work = pickle.load(le_file)
|
@@ -64,7 +66,8 @@ def cleaning_features(data):
|
|
64 |
country_mapping = {"United-States":1,"Other":0}
|
65 |
|
66 |
numeric_cols = ['age', 'educational-num', 'hours-per-week']
|
67 |
-
columns_to_encode = ['race','marital-status','relationship']
|
|
|
68 |
|
69 |
data['workclass'] = le_work.transform(data['workclass'])
|
70 |
data['occupation'] = le_occ.transform(data['occupation'])
|
@@ -74,6 +77,13 @@ def cleaning_features(data):
|
|
74 |
|
75 |
data[numeric_cols] = scaler.transform(data[numeric_cols])
|
76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
data = pca(data)
|
78 |
return data
|
79 |
|
|
|
32 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
33 |
|
34 |
def cleaning_features(data):
|
35 |
+
with open('race_onehot_encoder.pkl', 'rb') as enc_file:
|
36 |
+
encoder = pickle.load(enc_file)
|
37 |
|
38 |
with open('label_encoder_work.pkl', 'rb') as le_file:
|
39 |
le_work = pickle.load(le_file)
|
|
|
66 |
country_mapping = {"United-States":1,"Other":0}
|
67 |
|
68 |
numeric_cols = ['age', 'educational-num', 'hours-per-week']
|
69 |
+
# columns_to_encode = ['race','marital-status','relationship']
|
70 |
+
columns_to_encode = ['race']
|
71 |
|
72 |
data['workclass'] = le_work.transform(data['workclass'])
|
73 |
data['occupation'] = le_occ.transform(data['occupation'])
|
|
|
77 |
|
78 |
data[numeric_cols] = scaler.transform(data[numeric_cols])
|
79 |
|
80 |
+
for N in columns_to_encode:
|
81 |
+
race_encoded = encoder.transform(data[[N]])
|
82 |
+
race_encoded_cols = encoder.get_feature_names_out([N])
|
83 |
+
race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
|
84 |
+
# Combine the encoded data with original dataframe
|
85 |
+
data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
|
86 |
+
|
87 |
data = pca(data)
|
88 |
return data
|
89 |
|