Spaces:

huntrezz
/

LACityEmployeePayPredictor

Sleeping

App Files Files Community

huntrezz commited on Sep 18

Commit

137e5e0

•

1 Parent(s): b5954ff

Upload 2 files

Browse files

Files changed (2) hide show

LosAngelesPayPredictor.py +127 -0
requirements.txt +5 -0

LosAngelesPayPredictor.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import StandardScaler, OneHotEncoder
+from sklearn.model_selection import train_test_split
+from fastai.tabular.all import *
+from sklearn.ensemble import VotingRegressor
+from sklearn.linear_model import LinearRegression
+from sklearn.tree import DecisionTreeRegressor
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+import gradio as gr
+df = pd.read_csv('City_Employee_Payroll__Current__20240915.csv', low_memory=False)
+df = df.replace([np.inf, -np.inf], np.nan)
+cat_names = ['EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'GENDER', 'ETHNICITY', 'JOB_TITLE', 'DEPARTMENT_NO']
+cont_names = ['PAY_YEAR', 'REGULAR_PAY', 'OVERTIME_PAY', 'ALL_OTHER_PAY']
+df['PAY_RATIO'] = df['REGULAR_PAY'] / (df['OVERTIME_PAY'] + df['ALL_OTHER_PAY'] + 1)
+df['TOTAL_NON_REGULAR_PAY'] = df['OVERTIME_PAY'] + df['ALL_OTHER_PAY']
+cont_names.extend(['PAY_RATIO', 'TOTAL_NON_REGULAR_PAY'])
+X = df[cat_names + cont_names].copy()
+y = df['TOTAL_PAY'].copy()
+for col in cat_names:
+    X[col] = X[col].fillna('Unknown')
+X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+X_train_sample, _, y_train_sample, _ = train_test_split(X_train, y_train, train_size=0.3, random_state=42)
+to = TabularPandas(df, procs=[Categorify, FillMissing, Normalize], cat_names=cat_names, cont_names=cont_names, y_names='TOTAL_PAY', splits=RandomSplitter(valid_pct=0.2)(range_of(df)))
+dls = to.dataloaders(bs=64)
+learn = tabular_learner(dls, layers=[200, 100, 50], metrics=rmse)
+learn.fit_one_cycle(9)
+class FastAIWrapper(BaseEstimator, RegressorMixin):
+    def __init__(self, learn):
+        self.learn = learn
+    def fit(self, X, y):
+        return self
+    def predict(self, X):
+        dl = self.learn.dls.test_dl(X)
+        preds, _ = self.learn.get_preds(dl=dl)
+        return preds.numpy().flatten()
+preprocessor = ColumnTransformer(
+    transformers=[
+        ('num', StandardScaler(), cont_names),
+        ('cat', OneHotEncoder(drop='first', sparse=False, handle_unknown='ignore'), cat_names)
+    ])
+model1 = FastAIWrapper(learn)
+model2 = Pipeline([('preprocessor', preprocessor), ('regressor', LinearRegression())])
+model3 = Pipeline([('preprocessor', preprocessor), ('regressor', DecisionTreeRegressor())])
+ensemble = VotingRegressor(
+    estimators=[('fastai', model1), ('lr', model2), ('dt', model3)],
+    weights=[2, 1, 1]
+)
+ensemble.fit(X_train_sample, y_train_sample)
+def predict_total_pay(gender, job_title, ethnicity):
+    sample = pd.DataFrame({
+        'GENDER': [gender],
+        'JOB_TITLE': [job_title],
+        'ETHNICITY': [ethnicity],
+    })
+    group = df[(df['GENDER'] == gender) & (df['JOB_TITLE'] == job_title) & (df['ETHNICITY'] == ethnicity)]
+    if len(group) > 0:
+        sample['EMPLOYMENT_TYPE'] = [group['EMPLOYMENT_TYPE'].mode().iloc[0]]
+        sample['JOB_STATUS'] = [group['JOB_STATUS'].mode().iloc[0]]
+        sample['MOU'] = [group['MOU'].mode().iloc[0]]
+        sample['DEPARTMENT_NO'] = [group['DEPARTMENT_NO'].mode().iloc[0]]
+        sample['REGULAR_PAY'] = [group['REGULAR_PAY'].mean()]
+        sample['OVERTIME_PAY'] = [group['OVERTIME_PAY'].mean()]
+        sample['ALL_OTHER_PAY'] = [group['ALL_OTHER_PAY'].mean()]
+    else:
+        job_group = df[df['JOB_TITLE'] == job_title]
+        if len(job_group) > 0:
+            sample['EMPLOYMENT_TYPE'] = [job_group['EMPLOYMENT_TYPE'].mode().iloc[0]]
+            sample['JOB_STATUS'] = [job_group['JOB_STATUS'].mode().iloc[0]]
+            sample['MOU'] = [job_group['MOU'].mode().iloc[0]]
+            sample['DEPARTMENT_NO'] = [job_group['DEPARTMENT_NO'].mode().iloc[0]]
+            sample['REGULAR_PAY'] = [job_group['REGULAR_PAY'].mean()]
+            sample['OVERTIME_PAY'] = [job_group['OVERTIME_PAY'].mean()]
+            sample['ALL_OTHER_PAY'] = [job_group['ALL_OTHER_PAY'].mean()]
+        else:
+            sample['EMPLOYMENT_TYPE'] = [df['EMPLOYMENT_TYPE'].mode().iloc[0]]
+            sample['JOB_STATUS'] = [df['JOB_STATUS'].mode().iloc[0]]
+            sample['MOU'] = [df['MOU'].mode().iloc[0]]
+            sample['DEPARTMENT_NO'] = [df['DEPARTMENT_NO'].mode().iloc[0]]
+            sample['REGULAR_PAY'] = [df['REGULAR_PAY'].mean()]
+            sample['OVERTIME_PAY'] = [df['OVERTIME_PAY'].mean()]
+            sample['ALL_OTHER_PAY'] = [df['ALL_OTHER_PAY'].mean()]
+    sample['PAY_YEAR'] = [df['PAY_YEAR'].max()]
+    sample['PAY_RATIO'] = sample['REGULAR_PAY'] / (sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY'] + 1)
+    sample['TOTAL_NON_REGULAR_PAY'] = sample['OVERTIME_PAY'] + sample['ALL_OTHER_PAY']
+    categorical_columns = ['GENDER', 'JOB_TITLE', 'ETHNICITY', 'EMPLOYMENT_TYPE', 'JOB_STATUS', 'MOU', 'DEPARTMENT_NO']
+    for col in categorical_columns:
+        sample[col] = sample[col].astype('object')
+    prediction = ensemble.predict(sample)[0]
+    return prediction
+def gradio_predict(gender, ethnicity, job_title):
+    predicted_pay = predict_total_pay(gender, job_title, ethnicity)
+    return f"${predicted_pay:.2f}"
+genders = df['GENDER'].dropna().unique().tolist()
+ethnicities = df['ETHNICITY'].dropna().unique().tolist()
+job_titles = sorted(df['JOB_TITLE'].dropna().unique().tolist())
+iface = gr.Interface(
+    fn=gradio_predict,
+    inputs=[
+        gr.Dropdown(choices=genders, label="Gender"),
+        gr.Dropdown(choices=ethnicities, label="Ethnicity"),
+        gr.Dropdown(choices=job_titles, label="Job Title")
+    ],
+    outputs=gr.Textbox(label="Predicted Total Pay"),
+    title="LA City Employee Pay Predictor",
+    description="Predict the total pay for LA City employees based on gender, ethnicity, and job title."
+)
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+pandas
+numpy
+scikit-learn
+fastai
+gradio