import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pickle # Load model and encoders with open('model_kpi_706.pkl', 'rb') as file: model, department_encoder, region_encoder, education_encoder, gender_encoder, recruitment_channel_encoder = pickle.load(file) # Load your DataFrame # Replace 'your_data.csv' with the actual file name or URL df = pd.read_csv('Uncleaned_employees_final_dataset.csv') df = df.drop('employee_id', axis=1) # Streamlit App st.title('Employee KPIs App') # Define a session state to remember tab selections if 'tab_selected' not in st.session_state: st.session_state.tab_selected = 0 # Create tabs for prediction and visualization tabs = ['Predict KPIs', 'Visualize Data', 'Predict from CSV'] selected_tab = st.radio('Select Tab:', tabs, index=st.session_state.tab_selected) # Tab selection logic if selected_tab != st.session_state.tab_selected: st.session_state.tab_selected = tabs.index(selected_tab) # Tab 1: Predict KPIs if st.session_state.tab_selected == 0: st.header('Predict KPIs') # User Input Form department = st.selectbox('Department', department_encoder.classes_) region = st.selectbox('Region', region_encoder.classes_) education = st.selectbox('Education', education_encoder.classes_) gender = st.radio('Gender', gender_encoder.classes_) recruitment_channel = st.selectbox('Recruitment Channel', recruitment_channel_encoder.classes_) no_of_trainings = st.slider('Number of Trainings', 1, 10, 1) age = st.slider('Age', 18, 60, 30) previous_year_rating = st.slider('Previous Year Rating', 1.0, 5.0, 3.0) length_of_service = st.slider('Length of Service', 1, 20, 5) awards_won = st.checkbox('Awards Won') avg_training_score = st.slider('Average Training Score', 40, 100, 70) # Create a DataFrame for the user input user_input = pd.DataFrame({ 'department': [department], 'region': [region], 'education': [education], 'gender': [gender], 'recruitment_channel': [recruitment_channel], 'no_of_trainings': [no_of_trainings], 'age': [age], 'previous_year_rating': [previous_year_rating], 'length_of_service': [length_of_service], 'awards_won': [1 if awards_won else 0], 'avg_training_score': [avg_training_score] }) # Categorical Data Encoding user_input['department'] = department_encoder.transform(user_input['department']) user_input['region'] = region_encoder.transform(user_input['region']) user_input['education'] = education_encoder.transform(user_input['education']) user_input['gender'] = gender_encoder.transform(user_input['gender']) user_input['recruitment_channel'] = recruitment_channel_encoder.transform(user_input['recruitment_channel']) # Predicting prediction = model.predict(user_input) # Display Result st.subheader('Prediction Result:') st.write('KPIs_met_more_than_80:', prediction[0]) # Tab 2: Visualize Data elif st.session_state.tab_selected == 1: st.header('Visualize Data') # Select condition feature condition_feature = st.selectbox('Select Condition Feature:', df.columns) # Set default condition values default_condition_values = ['Select All'] + df[condition_feature].unique().tolist() # Select condition values condition_values = st.multiselect('Select Condition Values:', default_condition_values) # Handle 'Select All' choice if 'Select All' in condition_values: condition_values = df[condition_feature].unique().tolist() if len(condition_values) > 0: # Filter DataFrame based on selected condition filtered_df = df[df[condition_feature].isin(condition_values)] # Plot the number of employees based on KPIs fig, ax = plt.subplots(figsize=(14, 8)) sns.countplot(x=condition_feature, hue='KPIs_met_more_than_80', data=filtered_df, palette='viridis') plt.title('Number of Employees based on KPIs') plt.xlabel(condition_feature) plt.ylabel('Number of Employees') st.pyplot(fig) # Tab 3: Predict from CSV elif st.session_state.tab_selected == 2: st.header('Predict from CSV') # Upload CSV file uploaded_file = st.file_uploader("Upload CSV file", type=["csv"]) # uploaded_file if uploaded_file is not None: # Read CSV file csv_df_org = pd.read_csv(uploaded_file) csv_df_org = csv_df_org.dropna() # csv_df_org.columns csv_df = csv_df_org.copy() csv_df = csv_df.drop('employee_id',axis=1) # Categorical Data Encoding csv_df['department'] = department_encoder.transform(csv_df['department']) csv_df['region'] = region_encoder.transform(csv_df['region']) csv_df['education'] = education_encoder.transform(csv_df['education']) csv_df['gender'] = gender_encoder.transform(csv_df['gender']) csv_df['recruitment_channel'] = recruitment_channel_encoder.transform(csv_df['recruitment_channel']) # Predicting predictions = model.predict(csv_df) # Add predictions to the DataFrame csv_df_org['KPIs_met_more_than_80'] = predictions # Display the DataFrame with predictions st.subheader('Predicted Results:') st.write(csv_df_org) # Visualize predictions based on a selected feature st.subheader('Visualize Predictions') # Select feature for visualization feature_for_visualization = st.selectbox('Select Feature for Visualization:', csv_df_org.columns) # Plot the number of employees based on KPIs for the selected feature fig, ax = plt.subplots(figsize=(14, 8)) sns.countplot(x=feature_for_visualization, hue='KPIs_met_more_than_80', data=csv_df_org, palette='viridis') plt.title(f'Number of Employees based on KPIs - {feature_for_visualization}') plt.xlabel(feature_for_visualization) plt.ylabel('Number of Employees') st.pyplot(fig)