Update app.py
Browse files
app.py
CHANGED
@@ -26,41 +26,149 @@ def main():
|
|
26 |
st.write("This app helps HR practitioners predict employee attrition using a trained CatBoost model.")
|
27 |
st.write("Please provide the following information to make a prediction:")
|
28 |
|
29 |
-
# Define layout with
|
30 |
-
col1, col2 = st.columns(
|
31 |
|
32 |
# Column 1
|
33 |
with col1:
|
34 |
-
age = st.
|
35 |
-
monthly_income = st.
|
36 |
-
num_companies_worked = st.
|
37 |
-
percent_salary_hike = st.
|
38 |
-
training_times_last_year = st.
|
39 |
-
years_since_last_promotion = st.slider("Years Since Last Promotion", min_value=0, max_value=15)
|
40 |
-
years_with_curr_manager = st.slider("Years With Current Manager", min_value=0, max_value=15)
|
41 |
|
42 |
# Column 2
|
43 |
with col2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
over_time = st.checkbox("Over Time")
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
relationship_satisfaction = st.select_slider("Relationship Satisfaction", options=[1, 2, 3, 4])
|
49 |
|
50 |
# Create a DataFrame to hold the user input data
|
51 |
input_data = pd.DataFrame({
|
52 |
'Age': [age],
|
|
|
|
|
|
|
|
|
53 |
'MonthlyIncome': [monthly_income],
|
54 |
'NumCompaniesWorked': [num_companies_worked],
|
|
|
55 |
'PercentSalaryHike': [percent_salary_hike],
|
|
|
56 |
'TrainingTimesLastYear': [training_times_last_year],
|
57 |
-
'YearsSinceLastPromotion': [years_since_last_promotion],
|
58 |
-
'YearsWithCurrManager': [years_with_curr_manager],
|
59 |
-
'OverTime': [over_time],
|
60 |
'WorkLifeBalance': [work_life_balance],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
'EnvironmentSatisfaction': [environment_satisfaction],
|
|
|
62 |
'JobSatisfaction': [job_satisfaction],
|
63 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
})
|
65 |
|
66 |
# Make predictions
|
@@ -87,4 +195,4 @@ def main():
|
|
87 |
st.write(f"Probability of Attrition: {probability[0]*100:.2f}%")
|
88 |
|
89 |
if __name__ == "__main__":
|
90 |
-
main()
|
|
|
26 |
st.write("This app helps HR practitioners predict employee attrition using a trained CatBoost model.")
|
27 |
st.write("Please provide the following information to make a prediction:")
|
28 |
|
29 |
+
# Define layout with three columns
|
30 |
+
col1, col2, col3 = st.columns(3)
|
31 |
|
32 |
# Column 1
|
33 |
with col1:
|
34 |
+
age = st.number_input("Age", min_value=18, max_value=70)
|
35 |
+
monthly_income = st.number_input("Monthly Income")
|
36 |
+
num_companies_worked = st.number_input("Number of Companies Worked")
|
37 |
+
percent_salary_hike = st.number_input("Percent Salary Hike", min_value=10, max_value=25)
|
38 |
+
training_times_last_year = st.number_input("Training Times Last Year", min_value=0, max_value=6)
|
|
|
|
|
39 |
|
40 |
# Column 2
|
41 |
with col2:
|
42 |
+
department = st.selectbox("Department", ['Sales', 'Research & Development', 'Human Resources'])
|
43 |
+
environment_satisfaction = st.selectbox("Environment Satisfaction", [1, 2, 3, 4])
|
44 |
+
job_role = st.selectbox("Job Role", ['Sales Executive', 'Research Scientist', 'Laboratory Technician',
|
45 |
+
'Manufacturing Director', 'Healthcare Representative', 'Manager',
|
46 |
+
'Sales Representative', 'Research Director', 'Human Resources'])
|
47 |
+
job_satisfaction = st.selectbox("Job Satisfaction", [1, 2, 3, 4])
|
48 |
+
work_life_balance = st.selectbox("Work Life Balance", [1, 2, 3, 4])
|
49 |
+
|
50 |
+
# Column 3
|
51 |
+
with col3:
|
52 |
over_time = st.checkbox("Over Time")
|
53 |
+
relationship_satisfaction = st.selectbox("Relationship Satisfaction", [1, 2, 3, 4])
|
54 |
+
years_since_last_promotion = st.number_input("Years Since Last Promotion")
|
55 |
+
years_with_curr_manager = st.number_input("Years With Current Manager")
|
|
|
56 |
|
57 |
# Create a DataFrame to hold the user input data
|
58 |
input_data = pd.DataFrame({
|
59 |
'Age': [age],
|
60 |
+
'Department': [department],
|
61 |
+
'EnvironmentSatisfaction': [environment_satisfaction],
|
62 |
+
'JobRole': [job_role],
|
63 |
+
'JobSatisfaction': [job_satisfaction],
|
64 |
'MonthlyIncome': [monthly_income],
|
65 |
'NumCompaniesWorked': [num_companies_worked],
|
66 |
+
'OverTime': [over_time],
|
67 |
'PercentSalaryHike': [percent_salary_hike],
|
68 |
+
'RelationshipSatisfaction': [relationship_satisfaction],
|
69 |
'TrainingTimesLastYear': [training_times_last_year],
|
|
|
|
|
|
|
70 |
'WorkLifeBalance': [work_life_balance],
|
71 |
+
'YearsSinceLastPromotion': [years_since_last_promotion],
|
72 |
+
'YearsWithCurrManager': [years_with_curr_manager]
|
73 |
+
})
|
74 |
+
|
75 |
+
# Make predictions
|
76 |
+
prediction = model.predict(input_data)
|
77 |
+
probability = model.predict_proba(input_data)[:, 1]
|
78 |
+
|
79 |
+
# Display prediction
|
80 |
+
if prediction[0] == 0:
|
81 |
+
st.success("Employee is predicted to stay (Attrition = No)")
|
82 |
+
else:
|
83 |
+
st.error("Employee is predicted to leave (Attrition = Yes)")
|
84 |
+
|
85 |
+
# Offer recommendations for retaining the employee
|
86 |
+
st.subheader("Suggestions for retaining the employee:")
|
87 |
+
st.markdown("- Invest in orientation programs and career development for entry-level staff, which could contribute to higher retention.")
|
88 |
+
st.markdown("- Implement mentorship programs and career development initiatives aimed at engaging and retaining younger employees.")
|
89 |
+
st.markdown("- Offer robust training and development programs and regular promotions to foster career growth. This investment in skills and career advancement can contribute to higher job satisfaction and retention.")
|
90 |
+
st.markdown("- Recognize the diverse needs of employees based on marital status and consider tailoring benefits or support programs accordingly.")
|
91 |
+
st.markdown("- Consider offering benefits that cater to the unique needs of married, single, and divorced employees.")
|
92 |
+
st.markdown("- Introduce or enhance policies that support work-life balance for employees with families.")
|
93 |
+
st.markdown("- Recognize the unique challenges and opportunities within each department and tailor retention strategies accordingly.")
|
94 |
+
|
95 |
+
# Display probability
|
96 |
+
st.write(f"Probability of Attrition: {probability[0]:.2f}")
|
97 |
+
|
98 |
+
if __name__ == "__main__":
|
99 |
+
main()
|
100 |
+
import streamlit as st
|
101 |
+
import pickle
|
102 |
+
import pandas as pd
|
103 |
+
from catboost import CatBoostClassifier
|
104 |
+
|
105 |
+
# Load the trained model and unique values from the pickle file
|
106 |
+
with open('model_and_key_components.pkl', 'rb') as file:
|
107 |
+
saved_components = pickle.load(file)
|
108 |
+
|
109 |
+
model = saved_components['model']
|
110 |
+
unique_values = saved_components['unique_values']
|
111 |
+
|
112 |
+
# Define the Streamlit app
|
113 |
+
def main():
|
114 |
+
st.title("Employee Attrition Prediction App")
|
115 |
+
st.sidebar.title("Model Settings")
|
116 |
+
|
117 |
+
# Sidebar inputs
|
118 |
+
with st.sidebar.expander("View Unique Values"):
|
119 |
+
st.write("Unique values for each feature:")
|
120 |
+
for column, values in unique_values.items():
|
121 |
+
st.write(f"- {column}: {values}")
|
122 |
+
|
123 |
+
# Main content
|
124 |
+
st.write("Welcome to the Employee Attrition Prediction App!")
|
125 |
+
st.write("This app helps HR practitioners predict employee attrition using a trained CatBoost model.")
|
126 |
+
st.write("Please provide the following information to make a prediction:")
|
127 |
+
|
128 |
+
# Define layout with three columns
|
129 |
+
col1, col2, col3 = st.columns(3)
|
130 |
+
|
131 |
+
# Column 1
|
132 |
+
with col1:
|
133 |
+
age = st.number_input("Age", min_value=18, max_value=70)
|
134 |
+
monthly_income = st.number_input("Monthly Income")
|
135 |
+
num_companies_worked = st.number_input("Number of Companies Worked")
|
136 |
+
percent_salary_hike = st.number_input("Percent Salary Hike", min_value=10, max_value=25)
|
137 |
+
training_times_last_year = st.number_input("Training Times Last Year", min_value=0, max_value=6)
|
138 |
+
|
139 |
+
# Column 2
|
140 |
+
with col2:
|
141 |
+
department = st.selectbox("Department", ['Sales', 'Research & Development', 'Human Resources'])
|
142 |
+
environment_satisfaction = st.selectbox("Environment Satisfaction", [1, 2, 3, 4])
|
143 |
+
job_role = st.selectbox("Job Role", ['Sales Executive', 'Research Scientist', 'Laboratory Technician',
|
144 |
+
'Manufacturing Director', 'Healthcare Representative', 'Manager',
|
145 |
+
'Sales Representative', 'Research Director', 'Human Resources'])
|
146 |
+
job_satisfaction = st.selectbox("Job Satisfaction", [1, 2, 3, 4])
|
147 |
+
work_life_balance = st.selectbox("Work Life Balance", [1, 2, 3, 4])
|
148 |
+
|
149 |
+
# Column 3
|
150 |
+
with col3:
|
151 |
+
over_time = st.checkbox("Over Time")
|
152 |
+
relationship_satisfaction = st.selectbox("Relationship Satisfaction", [1, 2, 3, 4])
|
153 |
+
years_since_last_promotion = st.number_input("Years Since Last Promotion")
|
154 |
+
years_with_curr_manager = st.number_input("Years With Current Manager")
|
155 |
+
|
156 |
+
# Create a DataFrame to hold the user input data
|
157 |
+
input_data = pd.DataFrame({
|
158 |
+
'Age': [age],
|
159 |
+
'Department': [department],
|
160 |
'EnvironmentSatisfaction': [environment_satisfaction],
|
161 |
+
'JobRole': [job_role],
|
162 |
'JobSatisfaction': [job_satisfaction],
|
163 |
+
'MonthlyIncome': [monthly_income],
|
164 |
+
'NumCompaniesWorked': [num_companies_worked],
|
165 |
+
'OverTime': [over_time],
|
166 |
+
'PercentSalaryHike': [percent_salary_hike],
|
167 |
+
'RelationshipSatisfaction': [relationship_satisfaction],
|
168 |
+
'TrainingTimesLastYear': [training_times_last_year],
|
169 |
+
'WorkLifeBalance': [work_life_balance],
|
170 |
+
'YearsSinceLastPromotion': [years_since_last_promotion],
|
171 |
+
'YearsWithCurrManager': [years_with_curr_manager]
|
172 |
})
|
173 |
|
174 |
# Make predictions
|
|
|
195 |
st.write(f"Probability of Attrition: {probability[0]*100:.2f}%")
|
196 |
|
197 |
if __name__ == "__main__":
|
198 |
+
main()
|