Spaces:

zama-fhe
/

encrypted_credit_scoring

Running

App Files Files Community

encrypted_credit_scoring / settings.py

romanbredehoft-zama

Rename third party and improve collaboration comments

316f8e9 12 months ago

raw

history blame

2.95 kB

	"All constants used in the project."

	from pathlib import Path
	import pandas

	# The directory of this project
	REPO_DIR = Path(__file__).parent

	# Main necessary directories
	DEPLOYMENT_PATH = REPO_DIR / "deployment_files"
	FHE_KEYS = REPO_DIR / ".fhe_keys"
	CLIENT_FILES = REPO_DIR / "client_files"
	SERVER_FILES = REPO_DIR / "server_files"

	# ALl deployment directories
	DEPLOYMENT_PATH = DEPLOYMENT_PATH / "model"

	# Path targeting pre-processor saved files
	PRE_PROCESSOR_USER_PATH = DEPLOYMENT_PATH / 'pre_processor_user.pkl'
	PRE_PROCESSOR_BANK_PATH = DEPLOYMENT_PATH / 'pre_processor_bank.pkl'
	PRE_PROCESSOR_CS_AGENCY_PATH = DEPLOYMENT_PATH / 'pre_processor_cs_agency.pkl'

	# Create the necessary directories
	FHE_KEYS.mkdir(exist_ok=True)
	CLIENT_FILES.mkdir(exist_ok=True)
	SERVER_FILES.mkdir(exist_ok=True)

	# Store the server's URL
	SERVER_URL = "http://localhost:8000/"

	# Path to data file
	DATA_PATH = "data/data.csv"

	# Development settings
	PROCESSED_INPUT_SHAPE = (1, 39)

	CLIENT_TYPES = ["user", "bank", "cs_agency"]
	INPUT_INDEXES = {
	"user": 0,
	"bank": 1,
	"cs_agency": 2,
	}
	INPUT_SLICES = {
	"user": slice(0, 36), # First position: start from 0
	"bank": slice(36, 37), # Second position: start from n_feature_user
	"cs_agency": slice(37, 39), # Third position: start from n_feature_user + n_feature_bank
	}

	# Fix column order for pre-processing steps
	USER_COLUMNS = [
	'Own_car', 'Own_property', 'Mobile_phone', 'Num_children', 'Household_size',
	'Total_income', 'Age', 'Income_type', 'Education_type', 'Family_status', 'Housing_type',
	'Occupation_type',
	]
	BANK_COLUMNS = ["Account_age"]
	CS_AGENCY_COLUMNS = ["Years_employed", "Employed"]

	_data = pandas.read_csv(DATA_PATH, encoding="utf-8")

	def get_min_max(data, column):
	"""Get min/max values of a column in order to input them in Gradio's API as key arguments."""
	return {
	"minimum": int(data[column].min()),
	"maximum": int(data[column].max()),
	}

	# App data min and max values
	ACCOUNT_MIN_MAX = get_min_max(_data, "Account_age")
	CHILDREN_MIN_MAX = get_min_max(_data, "Num_children")
	INCOME_MIN_MAX = get_min_max(_data, "Total_income")
	AGE_MIN_MAX = get_min_max(_data, "Age")
	FAMILY_MIN_MAX = get_min_max(_data, "Household_size")

	# Default values
	INCOME_VALUE = 12000
	AGE_VALUE = 30

	# App data choices
	INCOME_TYPES = list(_data["Income_type"].unique())
	OCCUPATION_TYPES = list(_data["Occupation_type"].unique())
	HOUSING_TYPES = list(_data["Housing_type"].unique())
	EDUCATION_TYPES = list(_data["Education_type"].unique())
	FAMILY_STATUS = list(_data["Family_status"].unique())
	YEARS_EMPLOYED_BINS = ['0-2', '2-5', '5-8', '8-11', '11-18', '18+']

	# Years_employed bin order
	YEARS_EMPLOYED_BIN_NAME_TO_INDEX = {bin_name: i for i, bin_name in enumerate(YEARS_EMPLOYED_BINS)}

	assert len(YEARS_EMPLOYED_BINS) == len(list(_data["Years_employed"].unique())), (
	"Years_employed bins are not matching the expected list"
	)