Spaces:

ppaihack
/

ZamaKlinikV2

Sleeping

App Files Files Community

ZamaKlinikV2 / app.py

lukalafaye

fix

ecec68f 10 months ago

raw

history blame

3.51 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	import joblib
	import os
	import shutil
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.model_selection import train_test_split
	from sklearn.preprocessing import StandardScaler
	from sklearn.metrics import confusion_matrix
	from concrete.ml.sklearn.tree import DecisionTreeClassifier as FHEDecisionTreeClassifier
	from concrete.ml.deployment import FHEModelDev, FHEModelClient, FHEModelServer

	# Define the directory for FHE client/server files
	fhe_directory = '/tmp/fhe_client_server_files/'

	# Create the directory if it does not exist
	if not os.path.exists(fhe_directory):
	os.makedirs(fhe_directory)
	else:
	# If it exists, delete its contents
	shutil.rmtree(fhe_directory)
	os.makedirs(fhe_directory)

	# Load the data
	data = pd.read_csv('data/heart.xls')

	st.write("### Data Overview")
	st.write(data.head())
	data.info() # Show info in the Streamlit app

	# Correlation analysis
	data_corr = data.corr()
	plt.figure(figsize=(20, 20))
	sns.heatmap(data=data_corr, annot=True)
	st.write("### Correlation Heatmap")
	st.pyplot(plt)

	feature_value = np.array(data_corr['output'])
	for i in range(len(feature_value)):
	if feature_value[i] < 0:
	feature_value[i] = -feature_value[i]

	features_corr = pd.DataFrame(feature_value, index=data_corr['output'].index, columns=['correlation'])
	feature_sorted = features_corr.sort_values(by=['correlation'], ascending=False)
	feature_selected = feature_sorted.index

	st.write("### Selected Features")
	st.write(feature_selected)

	# Clean the data by selecting the most correlated features
	clean_data = data[feature_selected]

	# Prepare the dataset for training
	X = clean_data.iloc[:, 1:]
	Y = clean_data['output']
	x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.25, random_state=0)

	st.write("### Training Data Shape")
	st.write(f"X Train Shape: {x_train.shape}, Y Train Shape: {y_train.shape}")
	st.write(f"X Test Shape: {x_test.shape}, Y Test Shape: {y_test.shape}")

	# Feature scaling
	sc = StandardScaler()
	x_train = sc.fit_transform(x_train)
	x_test = sc.transform(x_test)

	# Train the model
	dt = DecisionTreeClassifier(criterion='entropy', max_depth=6)
	dt.fit(x_train, y_train)

	# Predict and evaluate
	y_pred = dt.predict(x_test)
	conf_mat = confusion_matrix(y_test, y_pred)
	accuracy = dt.score(x_test, y_test)

	st.write("### Confusion Matrix")
	st.write(conf_mat)
	st.write(f"### Accuracy: {round(accuracy * 100, 2)}%")

	# Save the model
	joblib.dump(dt, 'heart_disease_dt_model.pkl')

	# Convert the model for FHE
	fhe_compatible = FHEDecisionTreeClassifier.from_sklearn_model(dt, x_train, n_bits=10)
	fhe_compatible.compile(x_train)

	# Setup the server
	dev = FHEModelDev(path_dir=fhe_directory, model=fhe_compatible)
	dev.save()
	server = FHEModelServer(path_dir=fhe_directory)
	server.load()

	# Setup the client
	client = FHEModelClient(path_dir=fhe_directory, key_dir="/tmp/keys_client")
	serialized_evaluation_keys = client.get_serialized_evaluation_keys()

	# Load the dataset and select the relevant features for prediction
	sample_data = clean_data.iloc[0, 1:].values.reshape(1, -1) # First sample for prediction
	encrypted_data = client.quantize_encrypt_serialize(sample_data)

	# Run the server with encrypted data
	encrypted_result = server.run(encrypted_data, serialized_evaluation_keys)
	result = client.deserialize_decrypt_dequantize(encrypted_result)

	st.write("### Encrypted Prediction Result")
	st.write(result)