matsammut commited on
Commit
b651e33
·
verified ·
1 Parent(s): a4c0920

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -1
app.py CHANGED
@@ -32,6 +32,8 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
32
  return "Income >50K" if prediction == 1 else "Income <=50K"
33
 
34
  def cleaning_features(data):
 
 
35
 
36
  with open('label_encoder_work.pkl', 'rb') as le_file:
37
  le_work = pickle.load(le_file)
@@ -64,7 +66,8 @@ def cleaning_features(data):
64
  country_mapping = {"United-States":1,"Other":0}
65
 
66
  numeric_cols = ['age', 'educational-num', 'hours-per-week']
67
- columns_to_encode = ['race','marital-status','relationship']
 
68
 
69
  data['workclass'] = le_work.transform(data['workclass'])
70
  data['occupation'] = le_occ.transform(data['occupation'])
@@ -74,6 +77,13 @@ def cleaning_features(data):
74
 
75
  data[numeric_cols] = scaler.transform(data[numeric_cols])
76
 
 
 
 
 
 
 
 
77
  data = pca(data)
78
  return data
79
 
 
32
  return "Income >50K" if prediction == 1 else "Income <=50K"
33
 
34
  def cleaning_features(data):
35
+ with open('race_onehot_encoder.pkl', 'rb') as enc_file:
36
+ encoder = pickle.load(enc_file)
37
 
38
  with open('label_encoder_work.pkl', 'rb') as le_file:
39
  le_work = pickle.load(le_file)
 
66
  country_mapping = {"United-States":1,"Other":0}
67
 
68
  numeric_cols = ['age', 'educational-num', 'hours-per-week']
69
+ # columns_to_encode = ['race','marital-status','relationship']
70
+ columns_to_encode = ['race']
71
 
72
  data['workclass'] = le_work.transform(data['workclass'])
73
  data['occupation'] = le_occ.transform(data['occupation'])
 
77
 
78
  data[numeric_cols] = scaler.transform(data[numeric_cols])
79
 
80
+ for N in columns_to_encode:
81
+ race_encoded = encoder.transform(data[[N]])
82
+ race_encoded_cols = encoder.get_feature_names_out([N])
83
+ race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
84
+ # Combine the encoded data with original dataframe
85
+ data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
86
+
87
  data = pca(data)
88
  return data
89