Spaces:
Sleeping
Sleeping
File size: 6,121 Bytes
a9ad1db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
# Load model and encoders
with open('model_kpi_706.pkl', 'rb') as file:
model, department_encoder, region_encoder, education_encoder, gender_encoder, recruitment_channel_encoder = pickle.load(file)
# Load your DataFrame
# Replace 'your_data.csv' with the actual file name or URL
df = pd.read_csv('Uncleaned_employees_final_dataset.csv')
df = df.drop('employee_id', axis=1)
# Streamlit App
st.title('Employee KPIs App')
# Define a session state to remember tab selections
if 'tab_selected' not in st.session_state:
st.session_state.tab_selected = 0
# Create tabs for prediction and visualization
tabs = ['Predict KPIs', 'Visualize Data', 'Predict from CSV']
selected_tab = st.radio('Select Tab:', tabs, index=st.session_state.tab_selected)
# Tab selection logic
if selected_tab != st.session_state.tab_selected:
st.session_state.tab_selected = tabs.index(selected_tab)
# Tab 1: Predict KPIs
if st.session_state.tab_selected == 0:
st.header('Predict KPIs')
# User Input Form
department = st.selectbox('Department', department_encoder.classes_)
region = st.selectbox('Region', region_encoder.classes_)
education = st.selectbox('Education', education_encoder.classes_)
gender = st.radio('Gender', gender_encoder.classes_)
recruitment_channel = st.selectbox('Recruitment Channel', recruitment_channel_encoder.classes_)
no_of_trainings = st.slider('Number of Trainings', 1, 10, 1)
age = st.slider('Age', 18, 60, 30)
previous_year_rating = st.slider('Previous Year Rating', 1.0, 5.0, 3.0)
length_of_service = st.slider('Length of Service', 1, 20, 5)
awards_won = st.checkbox('Awards Won')
avg_training_score = st.slider('Average Training Score', 40, 100, 70)
# Create a DataFrame for the user input
user_input = pd.DataFrame({
'department': [department],
'region': [region],
'education': [education],
'gender': [gender],
'recruitment_channel': [recruitment_channel],
'no_of_trainings': [no_of_trainings],
'age': [age],
'previous_year_rating': [previous_year_rating],
'length_of_service': [length_of_service],
'awards_won': [1 if awards_won else 0],
'avg_training_score': [avg_training_score]
})
# Categorical Data Encoding
user_input['department'] = department_encoder.transform(user_input['department'])
user_input['region'] = region_encoder.transform(user_input['region'])
user_input['education'] = education_encoder.transform(user_input['education'])
user_input['gender'] = gender_encoder.transform(user_input['gender'])
user_input['recruitment_channel'] = recruitment_channel_encoder.transform(user_input['recruitment_channel'])
# Predicting
prediction = model.predict(user_input)
# Display Result
st.subheader('Prediction Result:')
st.write('KPIs_met_more_than_80:', prediction[0])
# Tab 2: Visualize Data
elif st.session_state.tab_selected == 1:
st.header('Visualize Data')
# Select condition feature
condition_feature = st.selectbox('Select Condition Feature:', df.columns)
# Set default condition values
default_condition_values = ['Select All'] + df[condition_feature].unique().tolist()
# Select condition values
condition_values = st.multiselect('Select Condition Values:', default_condition_values)
# Handle 'Select All' choice
if 'Select All' in condition_values:
condition_values = df[condition_feature].unique().tolist()
if len(condition_values) > 0:
# Filter DataFrame based on selected condition
filtered_df = df[df[condition_feature].isin(condition_values)]
# Plot the number of employees based on KPIs
fig, ax = plt.subplots(figsize=(14, 8))
sns.countplot(x=condition_feature, hue='KPIs_met_more_than_80', data=filtered_df, palette='viridis')
plt.title('Number of Employees based on KPIs')
plt.xlabel(condition_feature)
plt.ylabel('Number of Employees')
st.pyplot(fig)
# Tab 3: Predict from CSV
elif st.session_state.tab_selected == 2:
st.header('Predict from CSV')
# Upload CSV file
uploaded_file = st.file_uploader("Upload CSV file", type=["csv"])
# uploaded_file
if uploaded_file is not None:
# Read CSV file
csv_df_org = pd.read_csv(uploaded_file)
csv_df_org = csv_df_org.dropna()
# csv_df_org.columns
csv_df = csv_df_org.copy()
csv_df = csv_df.drop('employee_id',axis=1)
# Categorical Data Encoding
csv_df['department'] = department_encoder.transform(csv_df['department'])
csv_df['region'] = region_encoder.transform(csv_df['region'])
csv_df['education'] = education_encoder.transform(csv_df['education'])
csv_df['gender'] = gender_encoder.transform(csv_df['gender'])
csv_df['recruitment_channel'] = recruitment_channel_encoder.transform(csv_df['recruitment_channel'])
# Predicting
predictions = model.predict(csv_df)
# Add predictions to the DataFrame
csv_df_org['KPIs_met_more_than_80'] = predictions
# Display the DataFrame with predictions
st.subheader('Predicted Results:')
st.write(csv_df_org)
# Visualize predictions based on a selected feature
st.subheader('Visualize Predictions')
# Select feature for visualization
feature_for_visualization = st.selectbox('Select Feature for Visualization:', csv_df_org.columns)
# Plot the number of employees based on KPIs for the selected feature
fig, ax = plt.subplots(figsize=(14, 8))
sns.countplot(x=feature_for_visualization, hue='KPIs_met_more_than_80', data=csv_df_org, palette='viridis')
plt.title(f'Number of Employees based on KPIs - {feature_for_visualization}')
plt.xlabel(feature_for_visualization)
plt.ylabel('Number of Employees')
st.pyplot(fig)
|