Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -51,16 +51,29 @@ def cleaning_features(data):
|
|
51 |
data = pca(data)
|
52 |
return data
|
53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
def pca(data):
|
55 |
-
encoder =
|
56 |
-
|
|
|
57 |
encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out())
|
58 |
-
|
59 |
-
|
60 |
-
pca_columns = [f'pca_component_{i+1}' for i in range(10)]
|
61 |
pca_df = pd.DataFrame(pca_result_net, columns=pca_columns)
|
62 |
-
data = data.drop(columns=['workclass', 'occupation'], axis=1)
|
63 |
-
data = pd.concat([data, pca_df], axis=1)
|
64 |
return data
|
65 |
|
66 |
def hbdscan_tranform(df_transformed):
|
|
|
51 |
data = pca(data)
|
52 |
return data
|
53 |
|
54 |
+
# def pca(data):
|
55 |
+
# encoder = OneHotEncoder(sparse_output=False)
|
56 |
+
# one_hot_encoded = encoder.fit_transform(data[['workclass', 'occupation']])
|
57 |
+
# encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out())
|
58 |
+
# pca_net = PCA(n_components=10)
|
59 |
+
# pca_result_net = pca_net.fit_transform(encoded_columns_df)
|
60 |
+
# pca_columns = [f'pca_component_{i+1}' for i in range(10)]
|
61 |
+
# pca_df = pd.DataFrame(pca_result_net, columns=pca_columns)
|
62 |
+
# data = data.drop(columns=['workclass', 'occupation'], axis=1) #remove the original columns
|
63 |
+
# data = pd.concat([data, pca_df], axis=1)
|
64 |
+
# return data
|
65 |
+
|
66 |
+
|
67 |
def pca(data):
|
68 |
+
encoder = joblib.load('onehot_encoder.joblib')
|
69 |
+
pca_model = joblib.load('pca.joblib')
|
70 |
+
one_hot_encoded = encoder.transform(data[['workclass', 'occupation']])
|
71 |
encoded_columns_df = pd.DataFrame(one_hot_encoded, columns=encoder.get_feature_names_out())
|
72 |
+
pca_result_net = pca_model.transform(encoded_columns_df)
|
73 |
+
pca_columns = [f'pca_component_{i+1}' for i in range(pca_model.n_components_)]
|
|
|
74 |
pca_df = pd.DataFrame(pca_result_net, columns=pca_columns)
|
75 |
+
data = data.drop(columns=['workclass', 'occupation'], axis=1)
|
76 |
+
data = pd.concat([data, pca_df], axis=1)
|
77 |
return data
|
78 |
|
79 |
def hbdscan_tranform(df_transformed):
|