File size: 2,958 Bytes
1ba3f22
 
 
9a997e4
1ba3f22
 
 
 
74c0c8e
c119738
1ba3f22
 
 
 
74c0c8e
b47829b
74c0c8e
9a997e4
 
18ba8c1
9a997e4
 
1ba3f22
 
 
 
 
 
 
 
9a997e4
993f2a6
c119738
a241bb3
b47829b
c119738
 
 
 
 
 
 
b47829b
18ba8c1
 
 
c119738
9a997e4
74c0c8e
a241bb3
18ba8c1
a241bb3
 
 
31284a7
b47829b
a241bb3
9a997e4
 
 
 
 
 
 
 
 
 
31284a7
9a997e4
 
 
993f2a6
9a997e4
b47829b
 
 
 
9a997e4
 
 
 
 
 
b47829b
 
 
 
9a997e4
b47829b
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"All constants used in the project."

from pathlib import Path
import pandas

# The directory of this project
REPO_DIR = Path(__file__).parent

# Main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"

# ALl deployment directories
DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"

# Path targeting pre-processor saved files
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'

# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)

# Store the server's URL
SERVER_URL = "http://localhost:8000/" 

# Path to data file
DATA_PATH = "data/data.csv"

# Development settings
PROCESSED_INPUT_SHAPE = (1, 39)

CLIENT_TYPES = ["user", "bank", "third_party"]
INPUT_INDEXES = {
    "user": 0,
    "bank": 1,
    "third_party": 2,
}
INPUT_SLICES = {
    "user": slice(0, 36),  # First position: start from 0
    "bank": slice(36, 37),  # Second position: start from n_feature_user
    "third_party": slice(37, 39),  # Third position: start from n_feature_user + n_feature_bank
}

# Fix column order for pre-processing steps
USER_COLUMNS = [
    'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size', 
    'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type', 
    'Occupation_type',
]
BANK_COLUMNS = ["Account_age"]
THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]

_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

def get_min_max(data, column):
    """Get min/max values of a column in order to input them in Gradio's API as key arguments."""
    return {
        "minimum": int(data[column].min()),
        "maximum": int(data[column].max()), 
    }

# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")

# Default values
INCOME_VALUE = 12000
AGE_VALUE = 30

# App data choices 
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())
YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+']

# Years_employed bin order 
YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)}

assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), (
    "Years_employed bins are not matching the expected list"
)