MU_Usecase / app.py
NLPV's picture
Update app.py
2d520be verified
raw
history blame
3.27 kB
import gradio as gr
import pandas as pd
import numpy as np
import dare
import time
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
# Load and prepare data
data = pd.read_csv("parkinsons.data")
data.columns = data.columns.str.replace(':', '_')
X = data.drop(columns=["name", "status"]).values
y = data["status"].values
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.15, stratify=y, random_state=42
)
# Function to train model, delete a sample, and retrain
def run_dare_demo(delete_index=25):
logs = ""
# Train initial model
model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
start = time.perf_counter()
model.fit(X_train, y_train)
train_time = time.perf_counter() - start
y_pred = model.predict(X_test)
acc_before = accuracy_score(y_test, y_pred)
f1_before = f1_score(y_test, y_pred, average='macro')
logs += f"βœ… Initial training completed in {train_time:.4f} seconds\n"
#logs += f"🎯 Accuracy before unlearning: {acc_before:.4f}\n"
logs += f"🎯 F1-score before unlearning: {f1_before:.4f}\n"
# Delete a data point
try:
start_del = time.perf_counter()
model.delete(delete_index)
delete_time = time.perf_counter() - start_del
y_pred_after = model.predict(X_test.astype(np.float32))
acc_after = accuracy_score(y_test, y_pred_after)
f1_after = f1_score(y_test, y_pred_after, average='macro')
logs += f"\n🧽 Deleted index {delete_index} in {delete_time:.5f} seconds\n"
#logs += f"🎯 Accuracy after unlearning: {acc_after:.4f}\n"
logs += f"🎯 F1-score after unlearning: {f1_after :.4f}\n"
except Exception as e:
logs += f"\n⚠️ Error during unlearning: {str(e)}\n"
# Retrain from scratch for comparison
try:
X_retrain = np.delete(X_train, delete_index, axis=0)
y_retrain = np.delete(y_train, delete_index, axis=0)
retrain_model = dare.Forest(n_estimators=50, max_depth=3, random_state=42)
start_retrain = time.perf_counter()
retrain_model.fit(X_retrain, y_retrain)
retrain_time = time.perf_counter() - start_retrain
y_pred_retrain = retrain_model.predict(X_test)
acc_retrain = accuracy_score(y_test, y_pred_retrain)
f1_unlearn = f1_score(y_test, y_pred_retrain, average='macro')
logs += f"\nπŸ” Retraining completed in {retrain_time:.5f} seconds (without index {delete_index})\n"
#logs += f"🎯 Accuracy after retraining: {acc_retrain:.4f}\n"
logs += f"🎯 F1-score after retraining: {f1_unlearn:.4f}\n"
except Exception as e:
logs += f"\n⚠️ Error during retraining: {str(e)}\n"
return logs
# Gradio Interface
iface = gr.Interface(
fn=run_dare_demo,
inputs=gr.Slider(0, len(X_train)-1, value=25, step=1, label="Data Point Index to Unlearn"),
outputs="text",
title="DaRE: Unlearning Demo on Parkinson's Dataset",
description="This demo shows how to train a DaRE forest, unlearn a data point, and retrain for comparison using the Parkinson's dataset."
)
if __name__ == "__main__":
iface.launch()