File size: 3,391 Bytes
8b8d9a9
 
 
 
 
 
 
 
3d7365c
8b8d9a9
 
 
 
 
 
 
 
 
10af7a9
8b8d9a9
 
 
 
 
 
 
2d520be
8b8d9a9
 
 
 
 
 
3d7365c
8b8d9a9
 
f8dd345
 
8b8d9a9
 
 
 
 
32210fd
8b8d9a9
 
3d7365c
8b8d9a9
32210fd
f8dd345
 
8b8d9a9
 
 
 
 
 
 
 
2d520be
8b8d9a9
 
 
bcba520
 
 
3d7365c
bcba520
8b8d9a9
 
f8dd345
 
8b8d9a9
 
 
d0bd503
32210fd
8b8d9a9
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import gradio as gr
import pandas as pd
import numpy as np
import dare
import time
 
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
 
# Load and prepare data
data = pd.read_csv("parkinsons.data")
data.columns = data.columns.str.replace(':', '_')
X = data.drop(columns=["name", "status"]).values
y = data["status"].values
 
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)
 
# Function to train model, delete a sample, and retrain
def run_dare_demo(delete_index=25):
    logs = ""
 
    # Train initial model
    model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
    start = time.perf_counter()
    model.fit(X_train, y_train)
    train_time = time.perf_counter() - start
 
    y_pred = model.predict(X_test)
    acc_before = accuracy_score(y_test, y_pred)
    f1_before = f1_score(y_test, y_pred, average='macro')
 
    logs += f"βœ… Initial training completed in {train_time:.4f} seconds\n"
    #logs += f"🎯 Accuracy before unlearning: {acc_before:.4f}\n"
    logs += f"🎯 F1-score before unlearning: {f1_before:.4f}\n"
 
    # Delete a data point
    try:
        start_del = time.perf_counter()
        model.delete(delete_index)
        unlearning_time = time.perf_counter() - start_del
        y_pred_after = model.predict(X_test.astype(np.float32))
        acc_after = accuracy_score(y_test, y_pred_after)
        f1_after = f1_score(y_test, y_pred_after, average='macro')
 
        logs += f"\n🧽 Deleted index {delete_index} in {unlearning_time:.5f} seconds\n"
        #logs += f"🎯 Accuracy after unlearning: {acc_after:.4f}\n"
        logs += f"🎯 F1-score after unlearning: {f1_after :.4f}\n"
    except Exception as e:
        logs += f"\n⚠️ Error during unlearning: {str(e)}\n"
 
    # Retrain from scratch for comparison
    try:
        X_retrain = np.delete(X_train, delete_index, axis=0)
        y_retrain = np.delete(y_train, delete_index, axis=0)
 
        retrain_model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
        start_retrain = time.perf_counter()
        retrain_model.fit(X_retrain, y_retrain)
        retrain_time = time.perf_counter() - start_retrain

        y_pred_retrain = retrain_model.predict(X_test)
        acc_retrain = accuracy_score(y_test, y_pred_retrain)
        f1_unlearn = f1_score(y_test, y_pred_retrain, average='macro')
 
 
        logs += f"\nπŸ” Retraining completed in {retrain_time:.5f} seconds (without index {delete_index})\n"
        #logs += f"🎯 Accuracy after retraining: {acc_retrain:.4f}\n"
        logs += f"🎯 F1-score after retraining: {f1_unlearn:.4f}\n"
    except Exception as e:
        logs += f"\n⚠️ Error during retraining: {str(e)}\n"
 
    logs += f"\n⏳ Retraining takes {retrain_time/unlearning_time:.2f} times longer than unlearning.\n"
    
    return logs
 
# Gradio Interface
iface = gr.Interface(
    fn=run_dare_demo,
    inputs=gr.Slider(0, len(X_train)-1, value=25, step=1, label="Data Point Index to Unlearn"),
    outputs="text",
    title="DaRE: Unlearning Demo on Parkinson's Dataset",
    description="This demo shows how to train a DaRE forest, unlearn a data point, and retrain for comparison using the Parkinson's dataset."
)
 
if __name__ == "__main__":
    iface.launch()