romanbredehoft-zama
commited on
Commit
·
a241bb3
1
Parent(s):
993f2a6
Impose correct column order in pre-processing
Browse files- backend.py +6 -0
- deployment_files/client.zip +2 -2
- deployment_files/server.zip +2 -2
- development.py +15 -5
- settings.py +9 -1
- utils/pre_processing.py +1 -7
backend.py
CHANGED
@@ -21,6 +21,8 @@ from settings import (
|
|
21 |
PRE_PROCESSOR_USER_PATH,
|
22 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
23 |
CLIENT_TYPES,
|
|
|
|
|
24 |
)
|
25 |
|
26 |
from utils.client_server_interface import MultiInputsFHEModelClient
|
@@ -270,6 +272,8 @@ def pre_process_encrypt_send_user(client_id, *inputs):
|
|
270 |
"Housing_type": [housing_type],
|
271 |
})
|
272 |
|
|
|
|
|
273 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
274 |
|
275 |
return _encrypt_send(client_id, preprocessed_user_inputs, "user")
|
@@ -311,6 +315,8 @@ def pre_process_encrypt_send_third_party(client_id, *inputs):
|
|
311 |
"Years_employed": [years_salaried],
|
312 |
})
|
313 |
|
|
|
|
|
314 |
preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
|
315 |
|
316 |
return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
|
|
|
21 |
PRE_PROCESSOR_USER_PATH,
|
22 |
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
23 |
CLIENT_TYPES,
|
24 |
+
USER_COLUMNS,
|
25 |
+
THIRD_PARTY_COLUMNS,
|
26 |
)
|
27 |
|
28 |
from utils.client_server_interface import MultiInputsFHEModelClient
|
|
|
272 |
"Housing_type": [housing_type],
|
273 |
})
|
274 |
|
275 |
+
user_inputs = user_inputs.reindex(USER_COLUMNS, axis=1)
|
276 |
+
|
277 |
preprocessed_user_inputs = PRE_PROCESSOR_USER.transform(user_inputs)
|
278 |
|
279 |
return _encrypt_send(client_id, preprocessed_user_inputs, "user")
|
|
|
315 |
"Years_employed": [years_salaried],
|
316 |
})
|
317 |
|
318 |
+
third_party_inputs = third_party_inputs.reindex(THIRD_PARTY_COLUMNS, axis=1)
|
319 |
+
|
320 |
preprocessed_third_party_inputs = PRE_PROCESSOR_THIRD_PARTY.transform(third_party_inputs)
|
321 |
|
322 |
return _encrypt_send(client_id, preprocessed_third_party_inputs, "third_party")
|
deployment_files/client.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9c0a655d225d0f31642c20c8f3e5537505b6b6904ad8af7636631024cf6e18b6
|
3 |
+
size 76383
|
deployment_files/server.zip
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5570f7dfda2d5ced4a6bd411d9d2eba67b8bcbd523efac803be66abd4368a99
|
3 |
+
size 3321
|
development.py
CHANGED
@@ -9,10 +9,20 @@ from sklearn.model_selection import train_test_split
|
|
9 |
from sklearn.metrics import accuracy_score
|
10 |
from imblearn.over_sampling import SMOTE
|
11 |
|
12 |
-
from settings import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
from utils.client_server_interface import MultiInputsFHEModelDev
|
14 |
from utils.model import MultiInputXGBClassifier
|
15 |
-
from utils.pre_processing import get_pre_processors
|
16 |
|
17 |
|
18 |
def get_processed_multi_inputs(data):
|
@@ -39,9 +49,9 @@ data_y = data.pop("Target").copy()
|
|
39 |
data_x = data.copy()
|
40 |
|
41 |
# Get data from all parties
|
42 |
-
|
43 |
-
data_bank =
|
44 |
-
|
45 |
|
46 |
# Feature engineer the data
|
47 |
pre_processor_user, pre_processor_third_party = get_pre_processors()
|
|
|
9 |
from sklearn.metrics import accuracy_score
|
10 |
from imblearn.over_sampling import SMOTE
|
11 |
|
12 |
+
from settings import (
|
13 |
+
DEPLOYMENT_PATH,
|
14 |
+
RANDOM_STATE,
|
15 |
+
DATA_PATH,
|
16 |
+
INPUT_SLICES,
|
17 |
+
PRE_PROCESSOR_USER_PATH,
|
18 |
+
PRE_PROCESSOR_THIRD_PARTY_PATH,
|
19 |
+
USER_COLUMNS,
|
20 |
+
BANK_COLUMNS,
|
21 |
+
THIRD_PARTY_COLUMNS,
|
22 |
+
)
|
23 |
from utils.client_server_interface import MultiInputsFHEModelDev
|
24 |
from utils.model import MultiInputXGBClassifier
|
25 |
+
from utils.pre_processing import get_pre_processors
|
26 |
|
27 |
|
28 |
def get_processed_multi_inputs(data):
|
|
|
49 |
data_x = data.copy()
|
50 |
|
51 |
# Get data from all parties
|
52 |
+
data_user = data_x[USER_COLUMNS].copy()
|
53 |
+
data_bank = data_x[BANK_COLUMNS].copy()
|
54 |
+
data_third_party = data_x[THIRD_PARTY_COLUMNS].copy()
|
55 |
|
56 |
# Feature engineer the data
|
57 |
pre_processor_user, pre_processor_third_party = get_pre_processors()
|
settings.py
CHANGED
@@ -29,7 +29,7 @@ SERVER_URL = "http://localhost:8000/"
|
|
29 |
# files
|
30 |
DATA_PATH = "data/data.csv"
|
31 |
|
32 |
-
#
|
33 |
RANDOM_STATE = 0
|
34 |
INITIAL_INPUT_SHAPE = (1, 49)
|
35 |
|
@@ -45,6 +45,14 @@ INPUT_SLICES = {
|
|
45 |
"third_party": slice(43, 49), # Third position: start from n_feature_user + n_feature_bank
|
46 |
}
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
49 |
|
50 |
def get_min_max(data, column):
|
|
|
29 |
# files
|
30 |
DATA_PATH = "data/data.csv"
|
31 |
|
32 |
+
# Development settings
|
33 |
RANDOM_STATE = 0
|
34 |
INITIAL_INPUT_SHAPE = (1, 49)
|
35 |
|
|
|
45 |
"third_party": slice(43, 49), # Third position: start from n_feature_user + n_feature_bank
|
46 |
}
|
47 |
|
48 |
+
USER_COLUMNS = [
|
49 |
+
'Own_car', 'Own_property', 'Work_phone', 'Phone', 'Email', 'Num_children', 'Household_size',
|
50 |
+
'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
|
51 |
+
'Occupation_type',
|
52 |
+
]
|
53 |
+
BANK_COLUMNS = ["Account_length"]
|
54 |
+
THIRD_PARTY_COLUMNS = ["Years_employed", "Salaried"]
|
55 |
+
|
56 |
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
|
57 |
|
58 |
def get_min_max(data, column):
|
utils/pre_processing.py
CHANGED
@@ -83,10 +83,4 @@ def get_pre_processors():
|
|
83 |
verbose_feature_names_out=False,
|
84 |
)
|
85 |
|
86 |
-
return pre_processor_user, pre_processor_third_party
|
87 |
-
|
88 |
-
|
89 |
-
def select_and_pop_features(data, columns):
|
90 |
-
new_data = data[columns].copy()
|
91 |
-
data.drop(columns, axis=1, inplace=True)
|
92 |
-
return new_data
|
|
|
83 |
verbose_feature_names_out=False,
|
84 |
)
|
85 |
|
86 |
+
return pre_processor_user, pre_processor_third_party
|
|
|
|
|
|
|
|
|
|
|
|