AashishNKumar's picture
add objective function for binary classification
449b6cf
import gradio as gr
import numpy as np
import xgboost as xgb
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import (
accuracy_score,
precision_score,
recall_score,
f1_score,
confusion_matrix,
)
from ucimlrepo import fetch_ucirepo
import os
# Paths for saving/loading the model
MODEL_PATH = "heart_disease_model.json"
# Load and preprocess the data
heart_disease = fetch_ucirepo(id=45)
X = heart_disease.data.features
y = np.ravel(heart_disease.data.targets)
# Preprocessing pipeline
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)
# Train or load the model
if os.path.exists(MODEL_PATH):
# Load pre-trained model
model = xgb.Booster()
model.load_model(MODEL_PATH)
else:
# Hyperparameter tuning
param_grid = {
"objective": ["binary:logistic"], # For binary classification
"max_depth": [4, 5, 6],
"learning_rate": [0.01, 0.05, 0.1],
"n_estimators": [100, 200, 300],
"subsample": [0.8, 1.0],
"colsample_bytree": [0.8, 1.0],
"gamma": [0, 1, 5],
"lambda": [1, 2, 3],
"alpha": [0, 1],
}
model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
grid_search = GridSearchCV(
estimator=model, param_grid=param_grid, scoring="accuracy", cv=5, verbose=1
)
grid_search.fit(X_train, y_train)
# Best model
best_model = grid_search.best_estimator_
best_model.save_model(MODEL_PATH)
# Load the best model
model = xgb.Booster()
model.load_model(MODEL_PATH)
# Evaluate model
X_test_dmatrix = xgb.DMatrix(X_test)
y_pred = model.predict(X_test_dmatrix)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")
conf_matrix = confusion_matrix(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
# Define prediction function
def predict(
age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal
):
# Convert string values to numeric where needed
sex = int(sex)
cp = int(cp)
fbs = int(fbs)
restecg = int(restecg)
exang = int(exang)
slope = int(slope)
thal = int(thal)
# Combine inputs into a single feature list
features = np.array(
[
age,
sex,
cp,
trestbps,
chol,
fbs,
restecg,
thalach,
exang,
oldpeak,
slope,
ca,
thal,
]
).reshape(1, -1)
# Preprocess the inputs
features = scaler.transform(imputer.transform(features))
# Predict using the trained model
dmatrix = xgb.DMatrix(features)
prediction = model.predict(dmatrix)
return int(prediction[0])
# Gradio interface
feature_inputs = [
gr.Number(label="Age (years)"),
gr.Radio(label="Sex", choices=["0", "1"], type="value"), # Male: 1, Female: 0
gr.Radio(label="Chest Pain Type (cp)", choices=["0", "1", "2", "3"], type="value"),
gr.Number(label="Resting Blood Pressure (mm Hg)"),
gr.Number(label="Serum Cholestoral (mg/dl)"),
gr.Radio(
label="Fasting Blood Sugar > 120 mg/dl (fbs)", choices=["0", "1"], type="value"
),
gr.Radio(
label="Resting ECG Results (restecg)", choices=["0", "1", "2"], type="value"
),
gr.Number(label="Maximum Heart Rate Achieved (thalach)"),
gr.Radio(label="Exercise Induced Angina (exang)", choices=["0", "1"], type="value"),
gr.Number(label="ST Depression Induced by Exercise (oldpeak)"),
gr.Radio(
label="Slope of the Peak Exercise ST Segment (slope)",
choices=["0", "1", "2"],
type="value",
),
gr.Number(label="Number of Major Vessels Colored by Fluoroscopy (ca)"),
gr.Radio(label="Thalassemia (thal)", choices=["0", "1", "2", "3"], type="value"),
]
interface = gr.Interface(
fn=predict,
inputs=feature_inputs,
outputs="label",
title="Heart Disease Prediction",
description=(
"Predicts heart disease based on patient information. "
"Provide the required features to get a diagnosis prediction."
),
)
if __name__ == "__main__":
interface.launch()