Spaces:
Sleeping
Sleeping
narinsak unawong
commited on
Upload 3 files
Browse files- app_KPI_706.py +159 -0
- model_kpi_706.pkl +3 -0
- requirements.txt +7 -0
app_KPI_706.py
ADDED
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
import streamlit as st
|
3 |
+
import pandas as pd
|
4 |
+
import numpy as np
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
import seaborn as sns
|
7 |
+
import pickle
|
8 |
+
|
9 |
+
# Load model and encoders
|
10 |
+
with open('model_kpi_706.pkl', 'rb') as file:
|
11 |
+
model, department_encoder, region_encoder, education_encoder, gender_encoder, recruitment_channel_encoder = pickle.load(file)
|
12 |
+
|
13 |
+
# Load your DataFrame
|
14 |
+
# Replace 'your_data.csv' with the actual file name or URL
|
15 |
+
df = pd.read_csv('Uncleaned_employees_final_dataset.csv')
|
16 |
+
df = df.drop('employee_id', axis=1)
|
17 |
+
|
18 |
+
# Streamlit App
|
19 |
+
st.title('Employee KPIs App')
|
20 |
+
|
21 |
+
# Define a session state to remember tab selections
|
22 |
+
if 'tab_selected' not in st.session_state:
|
23 |
+
st.session_state.tab_selected = 0
|
24 |
+
|
25 |
+
# Create tabs for prediction and visualization
|
26 |
+
tabs = ['Predict KPIs', 'Visualize Data', 'Predict from CSV']
|
27 |
+
selected_tab = st.radio('Select Tab:', tabs, index=st.session_state.tab_selected)
|
28 |
+
|
29 |
+
# Tab selection logic
|
30 |
+
if selected_tab != st.session_state.tab_selected:
|
31 |
+
st.session_state.tab_selected = tabs.index(selected_tab)
|
32 |
+
|
33 |
+
# Tab 1: Predict KPIs
|
34 |
+
if st.session_state.tab_selected == 0:
|
35 |
+
st.header('Predict KPIs')
|
36 |
+
|
37 |
+
# User Input Form
|
38 |
+
department = st.selectbox('Department', department_encoder.classes_)
|
39 |
+
region = st.selectbox('Region', region_encoder.classes_)
|
40 |
+
education = st.selectbox('Education', education_encoder.classes_)
|
41 |
+
gender = st.radio('Gender', gender_encoder.classes_)
|
42 |
+
recruitment_channel = st.selectbox('Recruitment Channel', recruitment_channel_encoder.classes_)
|
43 |
+
no_of_trainings = st.slider('Number of Trainings', 1, 10, 1)
|
44 |
+
age = st.slider('Age', 18, 60, 30)
|
45 |
+
previous_year_rating = st.slider('Previous Year Rating', 1.0, 5.0, 3.0)
|
46 |
+
length_of_service = st.slider('Length of Service', 1, 20, 5)
|
47 |
+
awards_won = st.checkbox('Awards Won')
|
48 |
+
avg_training_score = st.slider('Average Training Score', 40, 100, 70)
|
49 |
+
|
50 |
+
# Create a DataFrame for the user input
|
51 |
+
user_input = pd.DataFrame({
|
52 |
+
'department': [department],
|
53 |
+
'region': [region],
|
54 |
+
'education': [education],
|
55 |
+
'gender': [gender],
|
56 |
+
'recruitment_channel': [recruitment_channel],
|
57 |
+
'no_of_trainings': [no_of_trainings],
|
58 |
+
'age': [age],
|
59 |
+
'previous_year_rating': [previous_year_rating],
|
60 |
+
'length_of_service': [length_of_service],
|
61 |
+
'awards_won': [1 if awards_won else 0],
|
62 |
+
'avg_training_score': [avg_training_score]
|
63 |
+
})
|
64 |
+
|
65 |
+
# Categorical Data Encoding
|
66 |
+
user_input['department'] = department_encoder.transform(user_input['department'])
|
67 |
+
user_input['region'] = region_encoder.transform(user_input['region'])
|
68 |
+
user_input['education'] = education_encoder.transform(user_input['education'])
|
69 |
+
user_input['gender'] = gender_encoder.transform(user_input['gender'])
|
70 |
+
user_input['recruitment_channel'] = recruitment_channel_encoder.transform(user_input['recruitment_channel'])
|
71 |
+
|
72 |
+
# Predicting
|
73 |
+
prediction = model.predict(user_input)
|
74 |
+
|
75 |
+
# Display Result
|
76 |
+
st.subheader('Prediction Result:')
|
77 |
+
st.write('KPIs_met_more_than_80:', prediction[0])
|
78 |
+
|
79 |
+
# Tab 2: Visualize Data
|
80 |
+
elif st.session_state.tab_selected == 1:
|
81 |
+
st.header('Visualize Data')
|
82 |
+
|
83 |
+
# Select condition feature
|
84 |
+
condition_feature = st.selectbox('Select Condition Feature:', df.columns)
|
85 |
+
|
86 |
+
# Set default condition values
|
87 |
+
default_condition_values = ['Select All'] + df[condition_feature].unique().tolist()
|
88 |
+
|
89 |
+
# Select condition values
|
90 |
+
condition_values = st.multiselect('Select Condition Values:', default_condition_values)
|
91 |
+
|
92 |
+
# Handle 'Select All' choice
|
93 |
+
if 'Select All' in condition_values:
|
94 |
+
condition_values = df[condition_feature].unique().tolist()
|
95 |
+
|
96 |
+
if len(condition_values) > 0:
|
97 |
+
# Filter DataFrame based on selected condition
|
98 |
+
filtered_df = df[df[condition_feature].isin(condition_values)]
|
99 |
+
|
100 |
+
# Plot the number of employees based on KPIs
|
101 |
+
fig, ax = plt.subplots(figsize=(14, 8))
|
102 |
+
sns.countplot(x=condition_feature, hue='KPIs_met_more_than_80', data=filtered_df, palette='viridis')
|
103 |
+
plt.title('Number of Employees based on KPIs')
|
104 |
+
plt.xlabel(condition_feature)
|
105 |
+
plt.ylabel('Number of Employees')
|
106 |
+
st.pyplot(fig)
|
107 |
+
|
108 |
+
# Tab 3: Predict from CSV
|
109 |
+
elif st.session_state.tab_selected == 2:
|
110 |
+
st.header('Predict from CSV')
|
111 |
+
|
112 |
+
# Upload CSV file
|
113 |
+
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
|
114 |
+
# uploaded_file
|
115 |
+
|
116 |
+
if uploaded_file is not None:
|
117 |
+
# Read CSV file
|
118 |
+
csv_df_org = pd.read_csv(uploaded_file)
|
119 |
+
csv_df_org = csv_df_org.dropna()
|
120 |
+
# csv_df_org.columns
|
121 |
+
|
122 |
+
csv_df = csv_df_org.copy()
|
123 |
+
csv_df = csv_df.drop('employee_id',axis=1)
|
124 |
+
|
125 |
+
|
126 |
+
|
127 |
+
# Categorical Data Encoding
|
128 |
+
csv_df['department'] = department_encoder.transform(csv_df['department'])
|
129 |
+
csv_df['region'] = region_encoder.transform(csv_df['region'])
|
130 |
+
csv_df['education'] = education_encoder.transform(csv_df['education'])
|
131 |
+
csv_df['gender'] = gender_encoder.transform(csv_df['gender'])
|
132 |
+
csv_df['recruitment_channel'] = recruitment_channel_encoder.transform(csv_df['recruitment_channel'])
|
133 |
+
|
134 |
+
|
135 |
+
# Predicting
|
136 |
+
predictions = model.predict(csv_df)
|
137 |
+
|
138 |
+
# Add predictions to the DataFrame
|
139 |
+
csv_df_org['KPIs_met_more_than_80'] = predictions
|
140 |
+
|
141 |
+
# Display the DataFrame with predictions
|
142 |
+
st.subheader('Predicted Results:')
|
143 |
+
st.write(csv_df_org)
|
144 |
+
|
145 |
+
# Visualize predictions based on a selected feature
|
146 |
+
st.subheader('Visualize Predictions')
|
147 |
+
|
148 |
+
# Select feature for visualization
|
149 |
+
feature_for_visualization = st.selectbox('Select Feature for Visualization:', csv_df_org.columns)
|
150 |
+
|
151 |
+
# Plot the number of employees based on KPIs for the selected feature
|
152 |
+
fig, ax = plt.subplots(figsize=(14, 8))
|
153 |
+
sns.countplot(x=feature_for_visualization, hue='KPIs_met_more_than_80', data=csv_df_org, palette='viridis')
|
154 |
+
plt.title(f'Number of Employees based on KPIs - {feature_for_visualization}')
|
155 |
+
plt.xlabel(feature_for_visualization)
|
156 |
+
plt.ylabel('Number of Employees')
|
157 |
+
st.pyplot(fig)
|
158 |
+
|
159 |
+
|
model_kpi_706.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85a1d1c74943028f6989a7358e490931be09b1b88acd16da7805719c0f339d9
|
3 |
+
size 136186
|
requirements.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
matplotlib
|
3 |
+
seaborn
|
4 |
+
XGBoost
|
5 |
+
scikit-learn
|
6 |
+
pandas
|
7 |
+
|