matsammut commited on
Commit
3c78fe7
·
verified ·
1 Parent(s): 1d2307b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -0
app.py CHANGED
@@ -1,5 +1,9 @@
1
  import gradio as gr
2
  import joblib
 
 
 
 
3
 
4
  # Load your saved model
5
  model = joblib.load("ann_model.joblib")
@@ -11,6 +15,36 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
11
  prediction = 1
12
  return "Income >50K" if prediction == 1 else "Income <=50K"
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  # Create the Gradio interface
15
  interface = gr.Interface(
16
  fn=predict,
 
1
  import gradio as gr
2
  import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
6
+ from sklearn.impute import KNNImputer
7
 
8
  # Load your saved model
9
  model = joblib.load("ann_model.joblib")
 
15
  prediction = 1
16
  return "Income >50K" if prediction == 1 else "Income <=50K"
17
 
18
+ def cleaning_features(data):
19
+ le = LabelEncoder()
20
+ scaler = StandardScaler()
21
+ encoder = OneHotEncoder(sparse_output=False)
22
+ numeric_cols = ['age', 'educational-num', 'hours-per-week']
23
+ columns_to_encode = ['race','marital-status','relationship']
24
+
25
+ data.replace({'?': np.nan, 99999: np.nan}, inplace=True)
26
+
27
+ # 1. Scale numerical features
28
+ data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
29
+
30
+ # 2. Label encode gender and income
31
+ data['gender'] = le.fit_transform(data['gender'])
32
+
33
+ # 3. One-hot encode race
34
+ for N in columns_to_encode:
35
+ race_encoded = encoder.fit_transform(data[[N]])
36
+ race_encoded_cols = encoder.get_feature_names_out([N])
37
+ race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
38
+ # Combine the encoded data with original dataframe
39
+ data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
40
+ # Binarize native country
41
+ data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
42
+ data['native-country'] = data['native-country'].astype(int)
43
+
44
+ print(data.head(10))
45
+
46
+ return data, encoder, scaler
47
+
48
  # Create the Gradio interface
49
  interface = gr.Interface(
50
  fn=predict,