romanbredehoft-zama's picture
Add second model for optional explainability step
74c0c8e
raw
history blame
3.06 kB
"All constants used in the project."
from pathlib import Path
import pandas
# The directory of this project
REPO_DIR = Path(__file__).parent
# Main necessary directories
DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
FHE_KEYS = REPO_DIR / ".fhe_keys"
CLIENT_FILES = REPO_DIR / "client_files"
SERVER_FILES = REPO_DIR / "server_files"
# ALl deployment directories
APPROVAL_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "approval_model"
EXPLAIN_DEPLOYMENT_PATH = DEPLOYMENT_PATH / "explain_model"
# Path targeting pre-processor saved files
PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
PRE_PROCESSOR_THIRD_PARTY_PATH = DEPLOYMENT_PATH / 'pre_processor_third_party.pkl'
# Create the necessary directories
FHE_KEYS.mkdir(exist_ok=True)
CLIENT_FILES.mkdir(exist_ok=True)
SERVER_FILES.mkdir(exist_ok=True)
# Store the server's URL
SERVER_URL = "http://localhost:8000/"
# Path to data file
DATA_PATH = "data/data.csv"
# Development settings
APPROVAL_PROCESSED_INPUT_SHAPE = (1, 39)
EXPLAIN_PROCESSED_INPUT_SHAPE = (1, 38)
CLIENT_TYPES = ["user", "bank", "third_party"]
INPUT_INDEXES = {
"user": 0,
"bank": 1,
"third_party": 2,
}
APPROVAL_INPUT_SLICES = {
"user": slice(0, 36), # First position: start from 0
"bank": slice(36, 37), # Second position: start from n_feature_user
"third_party": slice(37, 39), # Third position: start from n_feature_user + n_feature_bank
}
EXPLAIN_INPUT_SLICES = {
"user": slice(0, 36), # First position: start from 0
"bank": slice(36, 37), # Second position: start from n_feature_user
"third_party": slice(37, 38), # Third position: start from n_feature_user + n_feature_bank
}
# Fix column order for pre-processing steps
USER_COLUMNS = [
'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
'Occupation_type',
]
BANK_COLUMNS = ["Account_age"]
APPROVAL_THIRD_PARTY_COLUMNS = ["Years_employed", "Employed"]
EXPLAIN_THIRD_PARTY_COLUMNS = ["Employed"]
_data = pandas.read_csv(DATA_PATH, encoding="utf-8")
def get_min_max(data, column):
"""Get min/max values of a column in order to input them in Gradio's API as key arguments."""
return {
"minimum": int(data[column].min()),
"maximum": int(data[column].max()),
}
# App data min and max values
ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
INCOME_MIN_MAX = get_min_max(_data, "Total_income")
AGE_MIN_MAX = get_min_max(_data, "Age")
EMPLOYED_MIN_MAX = get_min_max(_data, "Years_employed")
FAMILY_MIN_MAX = get_min_max(_data, "Household_size")
# App data choices
INCOME_TYPES = list(_data["Income_type"].unique())
OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
HOUSING_TYPES = list(_data["Housing_type"].unique())
EDUCATION_TYPES = list(_data["Education_type"].unique())
FAMILY_STATUS = list(_data["Family_status"].unique())