Spaces:

matsammut
/

ICS5110-Applied_ML

Sleeping

App Files Files Community

matsammut commited on Jan 13

Commit

3c78fe7

verified ·

1 Parent(s): 1d2307b

Update app.py

Browse files

Files changed (1) hide show

app.py +34 -0

app.py CHANGED Viewed

@@ -1,5 +1,9 @@
 import gradio as gr
 import joblib
 # Load your saved model
 model = joblib.load("ann_model.joblib")
@@ -11,6 +15,36 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
     prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
 # Create the Gradio interface
 interface = gr.Interface(
     fn=predict,

 import gradio as gr
 import joblib
+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
+from sklearn.impute import KNNImputer
 # Load your saved model
 model = joblib.load("ann_model.joblib")
     prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
+def cleaning_features(data):
+    le = LabelEncoder()
+    scaler = StandardScaler()
+    encoder = OneHotEncoder(sparse_output=False)
+    numeric_cols = ['age', 'educational-num', 'hours-per-week']
+    columns_to_encode = ['race','marital-status','relationship']
+    data.replace({'?': np.nan, 99999: np.nan}, inplace=True)
+    # 1. Scale numerical features
+    data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
+    # 2. Label encode gender and income
+    data['gender'] = le.fit_transform(data['gender'])
+    # 3. One-hot encode race
+    for N in columns_to_encode:
+        race_encoded = encoder.fit_transform(data[[N]])
+        race_encoded_cols = encoder.get_feature_names_out([N])
+        race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
+        # Combine the encoded data with original dataframe
+        data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
+    # Binarize native country
+    data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
+    data['native-country'] = data['native-country'].astype(int)
+    print(data.head(10))
+    return data, encoder, scaler
 # Create the Gradio interface
 interface = gr.Interface(
     fn=predict,