Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,9 @@
|
|
1 |
import gradio as gr
|
2 |
import joblib
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Load your saved model
|
5 |
model = joblib.load("ann_model.joblib")
|
@@ -11,6 +15,36 @@ def predict(age, workclass, education, marital_status, occupation, relationship,
|
|
11 |
prediction = 1
|
12 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
# Create the Gradio interface
|
15 |
interface = gr.Interface(
|
16 |
fn=predict,
|
|
|
1 |
import gradio as gr
|
2 |
import joblib
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
|
6 |
+
from sklearn.impute import KNNImputer
|
7 |
|
8 |
# Load your saved model
|
9 |
model = joblib.load("ann_model.joblib")
|
|
|
15 |
prediction = 1
|
16 |
return "Income >50K" if prediction == 1 else "Income <=50K"
|
17 |
|
18 |
+
def cleaning_features(data):
|
19 |
+
le = LabelEncoder()
|
20 |
+
scaler = StandardScaler()
|
21 |
+
encoder = OneHotEncoder(sparse_output=False)
|
22 |
+
numeric_cols = ['age', 'educational-num', 'hours-per-week']
|
23 |
+
columns_to_encode = ['race','marital-status','relationship']
|
24 |
+
|
25 |
+
data.replace({'?': np.nan, 99999: np.nan}, inplace=True)
|
26 |
+
|
27 |
+
# 1. Scale numerical features
|
28 |
+
data[numeric_cols] = scaler.fit_transform(data[numeric_cols])
|
29 |
+
|
30 |
+
# 2. Label encode gender and income
|
31 |
+
data['gender'] = le.fit_transform(data['gender'])
|
32 |
+
|
33 |
+
# 3. One-hot encode race
|
34 |
+
for N in columns_to_encode:
|
35 |
+
race_encoded = encoder.fit_transform(data[[N]])
|
36 |
+
race_encoded_cols = encoder.get_feature_names_out([N])
|
37 |
+
race_encoded_df = pd.DataFrame(race_encoded, columns=race_encoded_cols, index=data.index)
|
38 |
+
# Combine the encoded data with original dataframe
|
39 |
+
data = pd.concat([data.drop(N, axis=1), race_encoded_df], axis=1)
|
40 |
+
# Binarize native country
|
41 |
+
data['native-country'] = data['native-country'].apply(lambda x: x == 'United-States')
|
42 |
+
data['native-country'] = data['native-country'].astype(int)
|
43 |
+
|
44 |
+
print(data.head(10))
|
45 |
+
|
46 |
+
return data, encoder, scaler
|
47 |
+
|
48 |
# Create the Gradio interface
|
49 |
interface = gr.Interface(
|
50 |
fn=predict,
|