matsammut commited on
Commit
932646c
·
verified ·
1 Parent(s): 008605e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -32,8 +32,8 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
32
  return "Income >50K" if prediction == 1 else "Income <=50K"
33
 
34
  def cleaning_features(data):
35
- with open('race_onehot_encoder.pkl', 'rb') as enc_file:
36
- encoder = pickle.load(enc_file)
37
 
38
  with open('label_encoder_work.pkl', 'rb') as le_file:
39
  le_work = pickle.load(le_file)
@@ -61,7 +61,7 @@ def cleaning_features(data):
61
  "Doctorate": 15,
62
  "Prof-school": 16
63
  }
64
-
65
  gender_mapping = {"Male":1,"Female":0}
66
  country_mapping = {"United-States":1,"Other":0}
67
 
@@ -77,12 +77,18 @@ def cleaning_features(data):
77
 
78
  data[numeric_cols] = scaler.transform(data[numeric_cols])
79
 
80
- for N in columns_to_encode:
81
- race_encoded = encoder.transform(data[[N]])
82
- race_encoded_cols = encoder.get_feature_names_out([N])
83
- race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
84
- # Combine the encoded data with original dataframe
85
- data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
 
 
 
 
 
 
86
 
87
  data = pca(data)
88
  return data
 
32
  return "Income >50K" if prediction == 1 else "Income <=50K"
33
 
34
  def cleaning_features(data):
35
+ # with open('race_onehot_encoder.pkl', 'rb') as enc_file:
36
+ # encoder = pickle.load(enc_file)
37
 
38
  with open('label_encoder_work.pkl', 'rb') as le_file:
39
  le_work = pickle.load(le_file)
 
61
  "Doctorate": 15,
62
  "Prof-school": 16
63
  }
64
+ race_categories = ["Amer-Indian-Eskimo", "Asian-Pac-Islander","Black", "Other","White"]
65
  gender_mapping = {"Male":1,"Female":0}
66
  country_mapping = {"United-States":1,"Other":0}
67
 
 
77
 
78
  data[numeric_cols] = scaler.transform(data[numeric_cols])
79
 
80
+ for races in race_categories:
81
+ if data['race'] == race:
82
+ df[f'race_{races}'] = 1
83
+ else:
84
+ df[f'race_{races}'] = 0
85
+ # for N in columns_to_encode:
86
+ # race_encoded = encoder.transform(data[[N]])
87
+ # race_encoded_cols = encoder.get_feature_names_out([N])
88
+ # race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
89
+ # # Combine the encoded data with original dataframe
90
+ # data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
91
+ df = df.drop(columns=['race'])
92
 
93
  data = pca(data)
94
  return data