Spaces:
Sleeping
Sleeping
File size: 3,391 Bytes
8b8d9a9 3d7365c 8b8d9a9 10af7a9 8b8d9a9 2d520be 8b8d9a9 3d7365c 8b8d9a9 f8dd345 8b8d9a9 32210fd 8b8d9a9 3d7365c 8b8d9a9 32210fd f8dd345 8b8d9a9 2d520be 8b8d9a9 bcba520 3d7365c bcba520 8b8d9a9 f8dd345 8b8d9a9 d0bd503 32210fd 8b8d9a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import pandas as pd
import numpy as np
import dare
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
# Load and prepare data
data = pd.read_csv("parkinsons.data")
data.columns = data.columns.str.replace(':', '_')
X = data.drop(columns=["name", "status"]).values
y = data["status"].values
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, stratify=y, random_state=42
)
# Function to train model, delete a sample, and retrain
def run_dare_demo(delete_index=25):
logs = ""
# Train initial model
model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
start = time.perf_counter()
model.fit(X_train, y_train)
train_time = time.perf_counter() - start
y_pred = model.predict(X_test)
acc_before = accuracy_score(y_test, y_pred)
f1_before = f1_score(y_test, y_pred, average='macro')
logs += f"β
Initial training completed in {train_time:.4f} seconds\n"
#logs += f"π― Accuracy before unlearning: {acc_before:.4f}\n"
logs += f"π― F1-score before unlearning: {f1_before:.4f}\n"
# Delete a data point
try:
start_del = time.perf_counter()
model.delete(delete_index)
unlearning_time = time.perf_counter() - start_del
y_pred_after = model.predict(X_test.astype(np.float32))
acc_after = accuracy_score(y_test, y_pred_after)
f1_after = f1_score(y_test, y_pred_after, average='macro')
logs += f"\n𧽠Deleted index {delete_index} in {unlearning_time:.5f} seconds\n"
#logs += f"π― Accuracy after unlearning: {acc_after:.4f}\n"
logs += f"π― F1-score after unlearning: {f1_after :.4f}\n"
except Exception as e:
logs += f"\nβ οΈ Error during unlearning: {str(e)}\n"
# Retrain from scratch for comparison
try:
X_retrain = np.delete(X_train, delete_index, axis=0)
y_retrain = np.delete(y_train, delete_index, axis=0)
retrain_model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
start_retrain = time.perf_counter()
retrain_model.fit(X_retrain, y_retrain)
retrain_time = time.perf_counter() - start_retrain
y_pred_retrain = retrain_model.predict(X_test)
acc_retrain = accuracy_score(y_test, y_pred_retrain)
f1_unlearn = f1_score(y_test, y_pred_retrain, average='macro')
logs += f"\nπ Retraining completed in {retrain_time:.5f} seconds (without index {delete_index})\n"
#logs += f"π― Accuracy after retraining: {acc_retrain:.4f}\n"
logs += f"π― F1-score after retraining: {f1_unlearn:.4f}\n"
except Exception as e:
logs += f"\nβ οΈ Error during retraining: {str(e)}\n"
logs += f"\nβ³ Retraining takes {retrain_time/unlearning_time:.2f} times longer than unlearning.\n"
return logs
# Gradio Interface
iface = gr.Interface(
fn=run_dare_demo,
inputs=gr.Slider(0, len(X_train)-1, value=25, step=1, label="Data Point Index to Unlearn"),
outputs="text",
title="DaRE: Unlearning Demo on Parkinson's Dataset",
description="This demo shows how to train a DaRE forest, unlearn a data point, and retrain for comparison using the Parkinson's dataset."
)
if __name__ == "__main__":
iface.launch() |