Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import joblib
|
2 |
+
import gradio as gr
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
# Load the preprocessor and the best model
|
6 |
+
preprocessor = joblib.load('/preprocessor.pkl')
|
7 |
+
best_model = joblib.load('/best_model_gradient_boosting.pkl')
|
8 |
+
|
9 |
+
# Define the prediction function
|
10 |
+
def predict_income(age, workclass, fnlwgt, education, education_num, marital_status, occupation, relationship, race, sex, capital_gain, capital_loss, hours_per_week, native_country):
|
11 |
+
# Create a DataFrame for the input data
|
12 |
+
input_data = pd.DataFrame({
|
13 |
+
'age': [age],
|
14 |
+
'workclass': [workclass],
|
15 |
+
'fnlwgt': [fnlwgt],
|
16 |
+
'education': [education],
|
17 |
+
'education-num': [education_num],
|
18 |
+
'marital-status': [marital_status],
|
19 |
+
'occupation': [occupation],
|
20 |
+
'relationship': [relationship],
|
21 |
+
'race': [race],
|
22 |
+
'sex': [sex],
|
23 |
+
'capital-gain': [capital_gain],
|
24 |
+
'capital-loss': [capital_loss],
|
25 |
+
'hours-per-week': [hours_per_week],
|
26 |
+
'native-country': [native_country]
|
27 |
+
})
|
28 |
+
|
29 |
+
# Ensure input data types match the training data
|
30 |
+
input_data = input_data.astype({
|
31 |
+
'age': 'int64',
|
32 |
+
'workclass': 'object',
|
33 |
+
'fnlwgt': 'int64',
|
34 |
+
'education': 'object',
|
35 |
+
'education-num': 'int64',
|
36 |
+
'marital-status': 'object',
|
37 |
+
'occupation': 'object',
|
38 |
+
'relationship': 'object',
|
39 |
+
'race': 'object',
|
40 |
+
'sex': 'object',
|
41 |
+
'capital-gain': 'int64',
|
42 |
+
'capital-loss': 'int64',
|
43 |
+
'hours-per-week': 'int64',
|
44 |
+
'native-country': 'object'
|
45 |
+
})
|
46 |
+
|
47 |
+
# Preprocess the input data
|
48 |
+
input_data_preprocessed = preprocessor.transform(input_data)
|
49 |
+
|
50 |
+
# Make prediction
|
51 |
+
prediction = best_model.predict(input_data_preprocessed)
|
52 |
+
prediction_proba = best_model.predict_proba(input_data_preprocessed)[:, 1]
|
53 |
+
|
54 |
+
# Map prediction to class
|
55 |
+
income_class = '>50K' if prediction[0] == 1 else '<=50K'
|
56 |
+
probability = prediction_proba[0]
|
57 |
+
|
58 |
+
return income_class, probability
|
59 |
+
|
60 |
+
# Create the Gradio interface
|
61 |
+
input_fields = [
|
62 |
+
gr.Number(label="Age"),
|
63 |
+
gr.Dropdown(label="Workclass", choices=['Private', 'Self-emp-not-inc', 'Self-emp-inc', 'Federal-gov', 'Local-gov', 'State-gov', 'Without-pay', 'Never-worked']),
|
64 |
+
gr.Number(label="Fnlwgt"),
|
65 |
+
gr.Dropdown(label="Education", choices=['Bachelors', 'Some-college', '11th', 'HS-grad', 'Prof-school', 'Assoc-acdm', 'Assoc-voc', '9th', '7th-8th', '12th', 'Masters', '1st-4th', '10th', 'Doctorate', '5th-6th', 'Preschool']),
|
66 |
+
gr.Number(label="Education-num"),
|
67 |
+
gr.Dropdown(label="Marital-status", choices=['Married-civ-spouse', 'Divorced', 'Never-married', 'Separated', 'Widowed', 'Married-spouse-absent', 'Married-AF-spouse']),
|
68 |
+
gr.Dropdown(label="Occupation", choices=['Tech-support', 'Craft-repair', 'Other-service', 'Sales', 'Exec-managerial', 'Prof-specialty', 'Handlers-cleaners', 'Machine-op-inspct', 'Adm-clerical', 'Farming-fishing', 'Transport-moving', 'Priv-house-serv', 'Protective-serv', 'Armed-Forces']),
|
69 |
+
gr.Dropdown(label="Relationship", choices=['Wife', 'Own-child', 'Husband', 'Not-in-family', 'Other-relative', 'Unmarried']),
|
70 |
+
gr.Dropdown(label="Race", choices=['White', 'Asian-Pac-Islander', 'Amer-Indian-Eskimo', 'Other', 'Black']),
|
71 |
+
gr.Dropdown(label="Sex", choices=['Female', 'Male']),
|
72 |
+
gr.Number(label="Capital-gain"),
|
73 |
+
gr.Number(label="Capital-loss"),
|
74 |
+
gr.Number(label="Hours-per-week"),
|
75 |
+
gr.Dropdown(label="Native-country", choices=['United-States', 'Cambodia', 'England', 'Puerto-Rico', 'Canada', 'Germany', 'Outlying-US(Guam-USVI-etc)', 'India', 'Japan', 'Greece', 'South', 'China', 'Cuba', 'Iran', 'Honduras', 'Philippines', 'Italy', 'Poland', 'Jamaica', 'Vietnam', 'Mexico', 'Portugal', 'Ireland', 'France', 'Dominican-Republic', 'Laos', 'Ecuador', 'Taiwan', 'Haiti', 'Columbia', 'Hungary', 'Guatemala', 'Nicaragua', 'Scotland', 'Thailand', 'Yugoslavia', 'El-Salvador', 'Trinadad&Tobago', 'Peru', 'Hong', 'Holand-Netherlands'])
|
76 |
+
]
|
77 |
+
|
78 |
+
output_fields = [
|
79 |
+
gr.Textbox(label="Predicted Income Class"),
|
80 |
+
gr.Textbox(label="Probability of >50K Income")
|
81 |
+
]
|
82 |
+
|
83 |
+
gr.Interface(fn=predict_income, inputs=input_fields, outputs=output_fields, title="Income Prediction App", description="Predict whether an individual makes over $50K a year based on various attributes.").launch(share=True,debug=True)
|