Spaces:

AfshinMA
/

Developer_Salary_Prediction_in_2024-Streamlit_App

Sleeping

App Files Files Community

AfshinMA commited on Dec 11, 2024

Commit

f663429

verified ·

1 Parent(s): 820965a

Upload 6 files

Browse files

Files changed (6) hide show

app.py +137 -0
datasets/cleaned_survey_results_public.csv +0 -0
models/CatBoostRegressor.joblib +3 -0
models/LGBMRegressor.joblib +3 -0
models/XGBoostRegressor.joblib +3 -0
requirements.txt +7 -0

app.py ADDED Viewed

	@@ -0,0 +1,137 @@

+import os
+import joblib
+import pandas as pd
+import streamlit as st
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score
+from typing import List, Dict, Any
+# Constants for directories and file names
+DIR = "C:\\Users\\Afshin\\Desktop\\10_Projects\\Project_3_Developer_Salary_Prediction\\"
+# Constants for directories and file names
+MODEL_DIR = DIR + 'models'
+DATA_DIR = DIR + 'datasets'
+DATA_FILE = 'cleaned_survey_results_public_v2.csv'
+MODEL_NAMES = [
+    #'CatBoost Regressor',
+    'XGBoost Regressor',
+    'LGBM Regressor',
+]
+def load_models(model_names: List[str]) -> Dict[str, Any]:
+    """Load machine learning models from disk."""
+    models = {}
+    for name in model_names:
+        path = os.path.join(MODEL_DIR, f"{name.replace(' ', '')}.joblib")
+        try:
+            models[name] = joblib.load(path)
+        except Exception as e:
+            st.error(f"Error loading model {name}: {str(e)}")
+    return models
+# Load models
+models = load_models(MODEL_NAMES)
+# Load dataset
+data_path = os.path.join(DATA_DIR, DATA_FILE)
+df = pd.read_csv(data_path)
+# Prepare features and target
+X = df.drop(columns=['Salary'])
+y = df['Salary']
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=123)
+# Pre-defined input choices
+input_choices = {
+    'MainBranch': df.MainBranch.unique().tolist(),
+    'Country': X.Country.unique().tolist(),
+    'EducationLevel': X.EducationLevel.unique().tolist(),
+    'RemoteWork': df.RemoteWork.unique().tolist(),
+}
+# Pre-computed statistics for default values
+default_comp = float(df.CompTotal.mean())  # Default CompTotal
+max_comp = float(df.CompTotal.max() * 1.5)
+default_years = 3.0  # Default years of experience
+max_years = float(df.YearsOfExperience.max() * 1.5)
+# Precompute predictions for training set
+y_train_predictions = {name: model.predict(X_train) for name, model in models.items()}
+def load_and_predict(sample: pd.DataFrame) -> pd.DataFrame:
+    """Predict salary using loaded models and evaluate statistics."""
+    results = []
+    for name, model in models.items():
+        try:
+            salary_pred = model.predict(sample)[0]
+            results.append({
+                'Model': name,
+                'Predicted Salary': salary_pred,
+                'R2 Score (%)': r2_score(y_train, y_train_predictions[name]) * 100,
+            })
+        except Exception as e:
+            st.error(f"Error during prediction with model {name}: {str(e)}")
+    return pd.DataFrame(results).sort_values(by='R2 Score (%)', ascending=False).reset_index(drop=True)
+# Streamlit UI setup
+st.set_page_config(page_title="Developer Salary Prediction App", page_icon="🤑", layout="wide")
+st.title("🤑 **Developer Salary Prediction**")
+# Sidebar inputs
+st.sidebar.header("Input Information")
+mainbranch = st.sidebar.selectbox("**MainBranch**", options=input_choices['MainBranch'])
+country = st.sidebar.selectbox("**Country**", options=input_choices['Country'])
+educationlevel = st.sidebar.selectbox("**Education Level**", options=input_choices['EducationLevel'])
+remotework = st.sidebar.selectbox("**Remote Work**", options=input_choices['RemoteWork'])
+comptotal = st.sidebar.number_input("**CompTotal**", min_value=0.0, max_value=max_comp, value=default_comp)
+yearsofexperience = st.sidebar.number_input("**Years of Experience**", min_value=0.0, max_value=max_years, value=default_years)
+# Handling predictions
+if st.sidebar.button(label=':rainbow[Predict Salary]'):
+    input_data = pd.DataFrame(
+        [[mainbranch, country, educationlevel, remotework, comptotal, yearsofexperience]],
+        columns=['MainBranch', 'Country', 'EducationLevel', 'RemoteWork', 'CompTotal', 'YearsOfExperience'])
+    results_df = load_and_predict(input_data)
+    if not results_df.empty:
+        st.write("### Prediction Results:")
+        st.dataframe(results_df)
+# Disclaimer Section
+st.markdown("---")
+st.text('''
+    >> Developer Salary Prediction App <<
+    This Streamlit application predicts developer salary using multiple machine learning models including LGBM, XGBoost, and Random Forest regressors.
+    Users can input developer information through a user-friendly interface, which includes fields such as country, education level, and years of experience.
+    > Features:
+        **Input Components**:
+        - **MainBranch**: Select your main area of expertise in development, such as software engineering, data science, or web development. This selection may influence salary expectations based on the branch's demand and trends.
+        - **Country**: Choose your country from the dropdown list. Regions often exhibit varying salary scales due to economic factors, the cost of living, and market demand for tech workers.
+        - **Education Level**: Indicate the highest level of education you have completed. Higher educational qualifications often correlate with higher earning potential in the tech industry.
+        - **Remote Work**: Specify whether you primarily work remotely, on-site, or in a hybrid setup. Remote work setups can affect salary offers, especially if hiring companies are based in different geographic areas.
+        - **CompTotal**: Enter your expected total compensation, which includes salary, bonuses, and other benefits. This field is crucial for setting a base for salary predictions and facilitates comparisons.
+        - **Years of Experience**: Provide the number of years you've been in a coding-related job. Generally, more years of experience are associated with higher salaries due to skill accumulation and professional development.
+        **Data Processing**:
+        - The app employs a pre-processed dataset, cleaned and prepared for model training.
+        - It utilizes features including country, education level, and years of experience for predictions.
+        - Models are loaded from disk, obtaining predictions based on user-provided input.
+        **Prediction**: The app performs predictions with loaded models and calculates performance metrics like R2 score.
+        **Results Display**: The predicted salary and model performance metrics are presented in a user-friendly format.
+    > Usage:
+       Fill out the developer information and click "Predict Salary" to derive insights on anticipated salary and model performance.
+    > Disclaimer:
+       This application serves educational purposes. Predictions are not guaranteed to be accurate.
+''')

datasets/cleaned_survey_results_public.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

models/CatBoostRegressor.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:807c04de1b414d8c59dac807b89a30fa85f961c0ff3185dbd0f19e065bcadf31
+size 347038

models/LGBMRegressor.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b589f3a59666c3f2bcc1f6642c0e29e350360c27ba5d6a447d304aea65e45cb
+size 871178

models/XGBoostRegressor.joblib ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d4b8db58c45e903904275bb98611d5a4e08c9fc9cddbfb25c42d16dfc2b5657
+size 2205896

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+pandas
+numpy
+joblib
+scikit-learn
+lightgbm
+xgboost
+catboost