matsammut commited on
Commit
15350d5
·
verified ·
1 Parent(s): 4662aaf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -1
app.py CHANGED
@@ -11,7 +11,12 @@ from sklearn.decomposition import PCA
11
 
12
  # # Define the prediction function
13
  def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
14
- features = np.array([age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country])
 
 
 
 
 
15
  fixed_features = cleaning_features(features)
16
  # prediction = model.predict(features)
17
  # prediction = 1
@@ -25,11 +30,13 @@ def cleaning_features(data):
25
  numeric_cols = ['age', 'educational-num', 'hours-per-week']
26
  columns_to_encode = ['race','marital-status','relationship']
27
 
 
28
  # 1. Scale numerical features
29
  data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
30
 
31
  # 2. Label encode gender and income
32
  data['gender'] = le.fit_transform(data['gender'])
 
33
 
34
  # 3. One-hot encode race
35
  for N in columns_to_encode:
 
11
 
12
  # # Define the prediction function
13
  def predict(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
14
+ features = [age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country]
15
+ columns = [
16
+ "age", "workclass", "education", "marital_status", "occupation",
17
+ "relationship", "race", "gender", "capital_gain", "capital_loss",
18
+ "hours_per_week", "native_country"]
19
+ df = pd.DataFrame(features, columns=columns)
20
  fixed_features = cleaning_features(features)
21
  # prediction = model.predict(features)
22
  # prediction = 1
 
30
  numeric_cols = ['age', 'educational-num', 'hours-per-week']
31
  columns_to_encode = ['race','marital-status','relationship']
32
 
33
+
34
  # 1. Scale numerical features
35
  data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
36
 
37
  # 2. Label encode gender and income
38
  data['gender'] = le.fit_transform(data['gender'])
39
+ data['education-num'] = le.fit_transform(data['education'])
40
 
41
  # 3. One-hot encode race
42
  for N in columns_to_encode: