narinsak unawong commited on
Commit
a9ad1db
·
verified ·
1 Parent(s): 4e9d904

Upload 3 files

Browse files
Files changed (3) hide show
  1. app_KPI_706.py +159 -0
  2. model_kpi_706.pkl +3 -0
  3. requirements.txt +7 -0
app_KPI_706.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
+ import pickle
8
+
9
+ # Load model and encoders
10
+ with open('model_kpi_706.pkl', 'rb') as file:
11
+ model, department_encoder, region_encoder, education_encoder, gender_encoder, recruitment_channel_encoder = pickle.load(file)
12
+
13
+ # Load your DataFrame
14
+ # Replace 'your_data.csv' with the actual file name or URL
15
+ df = pd.read_csv('Uncleaned_employees_final_dataset.csv')
16
+ df = df.drop('employee_id', axis=1)
17
+
18
+ # Streamlit App
19
+ st.title('Employee KPIs App')
20
+
21
+ # Define a session state to remember tab selections
22
+ if 'tab_selected' not in st.session_state:
23
+ st.session_state.tab_selected = 0
24
+
25
+ # Create tabs for prediction and visualization
26
+ tabs = ['Predict KPIs', 'Visualize Data', 'Predict from CSV']
27
+ selected_tab = st.radio('Select Tab:', tabs, index=st.session_state.tab_selected)
28
+
29
+ # Tab selection logic
30
+ if selected_tab != st.session_state.tab_selected:
31
+ st.session_state.tab_selected = tabs.index(selected_tab)
32
+
33
+ # Tab 1: Predict KPIs
34
+ if st.session_state.tab_selected == 0:
35
+ st.header('Predict KPIs')
36
+
37
+ # User Input Form
38
+ department = st.selectbox('Department', department_encoder.classes_)
39
+ region = st.selectbox('Region', region_encoder.classes_)
40
+ education = st.selectbox('Education', education_encoder.classes_)
41
+ gender = st.radio('Gender', gender_encoder.classes_)
42
+ recruitment_channel = st.selectbox('Recruitment Channel', recruitment_channel_encoder.classes_)
43
+ no_of_trainings = st.slider('Number of Trainings', 1, 10, 1)
44
+ age = st.slider('Age', 18, 60, 30)
45
+ previous_year_rating = st.slider('Previous Year Rating', 1.0, 5.0, 3.0)
46
+ length_of_service = st.slider('Length of Service', 1, 20, 5)
47
+ awards_won = st.checkbox('Awards Won')
48
+ avg_training_score = st.slider('Average Training Score', 40, 100, 70)
49
+
50
+ # Create a DataFrame for the user input
51
+ user_input = pd.DataFrame({
52
+ 'department': [department],
53
+ 'region': [region],
54
+ 'education': [education],
55
+ 'gender': [gender],
56
+ 'recruitment_channel': [recruitment_channel],
57
+ 'no_of_trainings': [no_of_trainings],
58
+ 'age': [age],
59
+ 'previous_year_rating': [previous_year_rating],
60
+ 'length_of_service': [length_of_service],
61
+ 'awards_won': [1 if awards_won else 0],
62
+ 'avg_training_score': [avg_training_score]
63
+ })
64
+
65
+ # Categorical Data Encoding
66
+ user_input['department'] = department_encoder.transform(user_input['department'])
67
+ user_input['region'] = region_encoder.transform(user_input['region'])
68
+ user_input['education'] = education_encoder.transform(user_input['education'])
69
+ user_input['gender'] = gender_encoder.transform(user_input['gender'])
70
+ user_input['recruitment_channel'] = recruitment_channel_encoder.transform(user_input['recruitment_channel'])
71
+
72
+ # Predicting
73
+ prediction = model.predict(user_input)
74
+
75
+ # Display Result
76
+ st.subheader('Prediction Result:')
77
+ st.write('KPIs_met_more_than_80:', prediction[0])
78
+
79
+ # Tab 2: Visualize Data
80
+ elif st.session_state.tab_selected == 1:
81
+ st.header('Visualize Data')
82
+
83
+ # Select condition feature
84
+ condition_feature = st.selectbox('Select Condition Feature:', df.columns)
85
+
86
+ # Set default condition values
87
+ default_condition_values = ['Select All'] + df[condition_feature].unique().tolist()
88
+
89
+ # Select condition values
90
+ condition_values = st.multiselect('Select Condition Values:', default_condition_values)
91
+
92
+ # Handle 'Select All' choice
93
+ if 'Select All' in condition_values:
94
+ condition_values = df[condition_feature].unique().tolist()
95
+
96
+ if len(condition_values) > 0:
97
+ # Filter DataFrame based on selected condition
98
+ filtered_df = df[df[condition_feature].isin(condition_values)]
99
+
100
+ # Plot the number of employees based on KPIs
101
+ fig, ax = plt.subplots(figsize=(14, 8))
102
+ sns.countplot(x=condition_feature, hue='KPIs_met_more_than_80', data=filtered_df, palette='viridis')
103
+ plt.title('Number of Employees based on KPIs')
104
+ plt.xlabel(condition_feature)
105
+ plt.ylabel('Number of Employees')
106
+ st.pyplot(fig)
107
+
108
+ # Tab 3: Predict from CSV
109
+ elif st.session_state.tab_selected == 2:
110
+ st.header('Predict from CSV')
111
+
112
+ # Upload CSV file
113
+ uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
114
+ # uploaded_file
115
+
116
+ if uploaded_file is not None:
117
+ # Read CSV file
118
+ csv_df_org = pd.read_csv(uploaded_file)
119
+ csv_df_org = csv_df_org.dropna()
120
+ # csv_df_org.columns
121
+
122
+ csv_df = csv_df_org.copy()
123
+ csv_df = csv_df.drop('employee_id',axis=1)
124
+
125
+
126
+
127
+ # Categorical Data Encoding
128
+ csv_df['department'] = department_encoder.transform(csv_df['department'])
129
+ csv_df['region'] = region_encoder.transform(csv_df['region'])
130
+ csv_df['education'] = education_encoder.transform(csv_df['education'])
131
+ csv_df['gender'] = gender_encoder.transform(csv_df['gender'])
132
+ csv_df['recruitment_channel'] = recruitment_channel_encoder.transform(csv_df['recruitment_channel'])
133
+
134
+
135
+ # Predicting
136
+ predictions = model.predict(csv_df)
137
+
138
+ # Add predictions to the DataFrame
139
+ csv_df_org['KPIs_met_more_than_80'] = predictions
140
+
141
+ # Display the DataFrame with predictions
142
+ st.subheader('Predicted Results:')
143
+ st.write(csv_df_org)
144
+
145
+ # Visualize predictions based on a selected feature
146
+ st.subheader('Visualize Predictions')
147
+
148
+ # Select feature for visualization
149
+ feature_for_visualization = st.selectbox('Select Feature for Visualization:', csv_df_org.columns)
150
+
151
+ # Plot the number of employees based on KPIs for the selected feature
152
+ fig, ax = plt.subplots(figsize=(14, 8))
153
+ sns.countplot(x=feature_for_visualization, hue='KPIs_met_more_than_80', data=csv_df_org, palette='viridis')
154
+ plt.title(f'Number of Employees based on KPIs - {feature_for_visualization}')
155
+ plt.xlabel(feature_for_visualization)
156
+ plt.ylabel('Number of Employees')
157
+ st.pyplot(fig)
158
+
159
+
model_kpi_706.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e85a1d1c74943028f6989a7358e490931be09b1b88acd16da7805719c0f339d9
3
+ size 136186
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+
2
+ matplotlib
3
+ seaborn
4
+ XGBoost
5
+ scikit-learn
6
+ pandas
7
+