File size: 2,479 Bytes
1ba3f22
 
 
9a997e4
1ba3f22
 
 
 
 
c119738
1ba3f22
 
 
 
9a997e4
 
 
 
1ba3f22
 
 
 
 
 
 
 
9a997e4
 
 
 
 
c119738
9a997e4
 
c119738
 
 
 
 
 
 
 
9a997e4
 
 
 
c119738
9a997e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8d5cb63
9a997e4
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
"All constants used in the project."

from pathlib import Path
import pandas

# The directory of this project
REPO_DIR = Path(__file__).parent

# This repository's main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"

# Path targeting pre-processor saved files
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'

# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)

# Store the server's URL
SERVER_URL = "http://localhost:8000/" 

# Path to data file
# The data was previously cleaned using this notebook : https://www.kaggle.com/code/samuelcortinhas/credit-cards-data-cleaning
# Additionally, the "ID" columns has been removed and the "Total_income" has been adjusted so that 
# its median value corresponds to France's 2023 median annual salary (22050 euros)
DATA_PATH = "data/clean_data.csv"

# Developement settings
RANDOM_STATE = 0
INITIAL_INPUT_SHAPE = (1, 49)

CLIENT_TYPES = ["user", "bank", "third_party"]
INPUT_INDEXES = {
    "user": 0,
    "bank": 1,
    "third_party": 2,
}
INPUT_SLICES = {
    "user": slice(0, 42),  # First position: start from 0
    "bank": slice(42, 43),  # Second position: start from n_feature_user
    "third_party": slice(43, 49),  # Third position: start from n_feature_user + n_feature_bank
}

_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

def get_min_max(data, column):
    """Get min/max values of a column in order to input them in Gradio's API as key arguments."""
    return {
        "minimum": int(data[column].min()),
        "maximum": int(data[column].max()), 
    }

# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_length")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
SALARIED_MIN_MAX = get_min_max(_data, "Years_employed")
FAMILY_MIN_MAX = get_min_max(_data, "Num_family")

# App data choices 
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())