import gradio as gr import pandas as pd import numpy as np import dare import time from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from sklearn.metrics import f1_score # Load and prepare data data = pd.read_csv("parkinsons.data") data.columns = data.columns.str.replace(':', '_') X = data.drop(columns=["name", "status"]).values y = data["status"].values # Train-test split X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, stratify=y, random_state=42 ) # Function to train model, delete a sample, and retrain def run_dare_demo(delete_index=25): logs = "" # Train initial model model = dare.Forest(n_estimators=50, max_depth=3, random_state=42) start = time.perf_counter() model.fit(X_train, y_train) train_time = time.perf_counter() - start y_pred = model.predict(X_test) acc_before = accuracy_score(y_test, y_pred) f1_before = f1_score(y_test, y_pred, average='macro') logs += f"✅ Initial training completed in {train_time:.4f} seconds\n" #logs += f"🎯 Accuracy before unlearning: {acc_before:.4f}\n" logs += f"🎯 F1-score before unlearning: {f1_before:.4f}\n" # Delete a data point try: start_del = time.perf_counter() model.delete(delete_index) unlearning_time = time.perf_counter() - start_del y_pred_after = model.predict(X_test.astype(np.float32)) acc_after = accuracy_score(y_test, y_pred_after) f1_after = f1_score(y_test, y_pred_after, average='macro') logs += f"\n🧽 Deleted index {delete_index} in {unlearning_time:.5f} seconds\n" #logs += f"🎯 Accuracy after unlearning: {acc_after:.4f}\n" logs += f"🎯 F1-score after unlearning: {f1_after :.4f}\n" except Exception as e: logs += f"\n⚠️ Error during unlearning: {str(e)}\n" # Retrain from scratch for comparison try: X_retrain = np.delete(X_train, delete_index, axis=0) y_retrain = np.delete(y_train, delete_index, axis=0) retrain_model = dare.Forest(n_estimators=50, max_depth=3, random_state=42) start_retrain = time.perf_counter() retrain_model.fit(X_retrain, y_retrain) retrain_time = time.perf_counter() - start_retrain y_pred_retrain = retrain_model.predict(X_test) acc_retrain = accuracy_score(y_test, y_pred_retrain) f1_unlearn = f1_score(y_test, y_pred_retrain, average='macro') logs += f"\n🔁 Retraining completed in {retrain_time:.5f} seconds (without index {delete_index})\n" #logs += f"🎯 Accuracy after retraining: {acc_retrain:.4f}\n" logs += f"🎯 F1-score after retraining: {f1_unlearn:.4f}\n" except Exception as e: logs += f"\n⚠️ Error during retraining: {str(e)}\n" logs += f"\n⏳ Retraining takes {retrain_time/unlearning_time:.2f} times longer than unlearning.\n" return logs # Gradio Interface iface = gr.Interface( fn=run_dare_demo, inputs=gr.Slider(0, len(X_train)-1, value=25, step=1, label="Data Point Index to Unlearn"), outputs="text", title="DaRE: Unlearning Demo on Parkinson's Dataset", description="This demo shows how to train a DaRE forest, unlearn a data point, and retrain for comparison using the Parkinson's dataset." ) if __name__ == "__main__": iface.launch()