Spaces:

matsammut
/

ICS5110-Applied_ML

Sleeping

App Files Files Community

matsammut commited on Jan 19

Commit

fbfa266

verified ·

1 Parent(s): 73f0a1f

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -34

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ import hdbscan
 # # Define the prediction function
-def predict_ann(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -23,7 +23,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
-    fixed_features = cleaning_features(df,race)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
@@ -33,7 +33,7 @@ def predict_ann(age, workclass, education, marital_status, occupation, relations
     # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
-def predict_rf(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -43,7 +43,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
-    fixed_features = cleaning_features(df,race)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
@@ -53,7 +53,7 @@ def predict_rf(age, workclass, education, marital_status, occupation, relationsh
     # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
-def predict_hb(age, workclass, education, marital_status, occupation, relationship, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
@@ -63,7 +63,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
-    fixed_features = cleaning_features(df,race)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
@@ -75,7 +75,7 @@ def predict_hb(age, workclass, education, marital_status, occupation, relationsh
     return f"Predicted Cluster (HDBSCAN): {prediction}"
-def cleaning_features(data,race):
     # with open('race_onehot_encoder.pkl', 'rb') as enc_file:
     #     encoder = pickle.load(enc_file)
@@ -135,6 +135,12 @@ def cleaning_features(data,race):
     data = data.drop(columns=['race'])
     data = pca(data)
     return data
 # def pca(data):
@@ -192,11 +198,6 @@ ann_inputs = [
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
-        gr.Dropdown(
-            ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
-             "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
-            label="Marital Status"
-        ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -205,10 +206,6 @@ ann_inputs = [
              "Armed-Forces"],
             label="Occupation"
         ),
-        gr.Dropdown(
-            ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
-            label="Relationship"
-        ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"
@@ -238,11 +235,6 @@ rf_inputs = [
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
-        gr.Dropdown(
-            ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
-             "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
-            label="Marital Status"
-        ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -251,10 +243,6 @@ rf_inputs = [
              "Armed-Forces"],
             label="Occupation"
         ),
-        gr.Dropdown(
-            ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
-            label="Relationship"
-        ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"
@@ -284,11 +272,6 @@ hbd_inputs = [
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
-        gr.Dropdown(
-            ["Married-civ-spouse", "Divorced", "Never-married", "Separated",
-             "Widowed", "Married-spouse-absent", "Married-AF-spouse"],
-            label="Marital Status"
-        ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
@@ -297,10 +280,6 @@ hbd_inputs = [
              "Armed-Forces"],
             label="Occupation"
         ),
-        gr.Dropdown(
-            ["Wife", "Husband", "Own-child", "Unmarried", "Other-relative", "Not-in-family"],
-            label="Relationship"
-        ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"

 # # Define the prediction function
+def predict_ann(age, workclass, education, occupation, race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
+    fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
     # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
+def predict_rf(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
+    fixed_features = cleaning_features(df,race,False)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
     # prediction = 1
     return "Income >50K" if prediction == 1 else "Income <=50K"
+def predict_hb(age, workclass, education,  occupation,  race, gender, capital_gain, capital_loss, hours_per_week, native_country):
     # columns = {
     # "age": [age], "workclass":[workclass], "educational-num":[education], "marital-status":[marital_status], "occupation":[occupation],
     # "relationship":[relationship], "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "race":[race], "gender":[gender], "capital-gain":[capital_gain], "capital-loss":[capital_loss],
     "hours-per-week":[hours_per_week], "native-country":[native_country]}
     df = pd.DataFrame(data=columns)
+    fixed_features = cleaning_features(df,race,True)
     print(fixed_features)
     # with open('ann_model.pkl', 'rb') as ann_model_file:
     #     ann_model = pickle.load(ann_model_file)
     return f"Predicted Cluster (HDBSCAN): {prediction}"
+def cleaning_features(data,race,hdbscan):
     # with open('race_onehot_encoder.pkl', 'rb') as enc_file:
     #     encoder = pickle.load(enc_file)
     data = data.drop(columns=['race'])
     data = pca(data)
+    if(hdbscan):
+        data['capital-gain'] = np.log1p(data['capital-gain'])
+        data['capital-loss'] = np.log1p(data['capital-loss'])
+        scaler = joblib.load("robust_scaler.pkl")
+        numerical_features = ['age', 'capital-gain', 'capital-loss', 'hours-per-week']
+        data[numerical_features] = scaler.transform(data[numerical_features])
     return data
 # def pca(data):
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
              "Armed-Forces"],
             label="Occupation"
         ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
              "Armed-Forces"],
             label="Occupation"
         ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"
              "1st-4th", "10th", "Doctorate", "5th-6th", "Preschool"],
             label="Education"
         ),
         gr.Dropdown(
             ["Tech-support", "Craft-repair", "Other-service", "Sales",
              "Exec-managerial", "Prof-specialty", "Handlers-cleaners",
              "Armed-Forces"],
             label="Occupation"
         ),
         gr.Dropdown(
             ["White", "Black", "Asian-Pac-Islander", "Amer-Indian-Eskimo", "Other"],
             label="Race"