Spaces:

EPITA-PFEE
/

private-fhe-fraud-detection

Running

App Files Files Community

private-fhe-fraud-detection / models /fhe_model.py

Tenefix

Upload 5 files

34c497e verified 7 months ago

raw

history blame contribute delete

1.82 kB

	"""
	Module for training and deploying an FHE-enabled
	Random Forest model using Concrete ML.
	"""

	import os
	import pandas as pd
	import joblib
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from concrete.ml.sklearn.rf import RandomForestClassifier
	from concrete.ml.deployment import FHEModelDev

	# Load the data (100,000 rows only)
	DATA_PATH = os.path.join(os.path.abspath(os.getcwd()), "dataset", "card_transdata.csv")
	df = pd.read_csv(DATA_PATH, nrows=100000) # Limit to 100,000 rows

	# Check for missing values
	if df.isnull().sum().any():
	df = df.dropna()

	# Handle class imbalance
	fraud = df[df["fraud"] == 1]
	non_fraud = df[df["fraud"] == 0].sample(n=len(fraud), random_state=42)
	balanced_df = pd.concat([fraud, non_fraud])

	# Separate features and target
	X = balanced_df.drop(columns=["fraud"])
	y = balanced_df["fraud"].astype(int)

	# Split into training and validation sets
	X_train, X_val, y_train, y_val = train_test_split(
	X, y, test_size=0.2, random_state=42, stratify=y
	)

	# Preprocessing: scale the data
	scaler = StandardScaler()
	X_train_scaled = scaler.fit_transform(X_train)
	X_val_scaled = scaler.transform(X_val)

	# Save the scaler for later use
	SCALER_PATH = os.path.join(os.path.abspath(os.getcwd()), "models", "scaler.pkl")
	joblib.dump(scaler, SCALER_PATH)

	# Train the Random Forest model with Concrete ML
	model = RandomForestClassifier(n_estimators=100, random_state=42)
	model.fit(X_train_scaled, y_train)

	# Compile the model for homomorphic encryption
	model.compile(X_train_scaled)

	# Save the model and necessary files for client and server
	FHE_DIRECTORY = os.path.join(os.path.abspath(os.getcwd()), "models", "fhe_files")
	dev = FHEModelDev(path_dir=FHE_DIRECTORY, model=model)
	dev.save()

	print("Model trained, compiled, and saved.")