Spaces:

AashishNKumar
/

heart-disease-predictor

Sleeping

File size: 4,907 Bytes

import gradio as gr
import numpy as np
import xgboost as xgb
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)
from ucimlrepo import fetch_ucirepo
import os

# Paths for saving/loading the model
MODEL_PATH = "heart_disease_model.json"

# Load and preprocess the data
heart_disease = fetch_ucirepo(id=45)
X = heart_disease.data.features
y = np.ravel(heart_disease.data.targets)

# Preprocessing pipeline
imputer = SimpleImputer(strategy="mean")
X = imputer.fit_transform(X)
scaler = StandardScaler()
X = scaler.fit_transform(X)
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled
)

# Train or load the model
if os.path.exists(MODEL_PATH):
    # Load pre-trained model
    model = xgb.Booster()
    model.load_model(MODEL_PATH)
else:
    # Hyperparameter tuning
    param_grid = {
        "objective": ["binary:logistic"],  # For binary classification
        "max_depth": [4, 5, 6],
        "learning_rate": [0.01, 0.05, 0.1],
        "n_estimators": [100, 200, 300],
        "subsample": [0.8, 1.0],
        "colsample_bytree": [0.8, 1.0],
        "gamma": [0, 1, 5],
        "lambda": [1, 2, 3],
        "alpha": [0, 1],
    }

    model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="mlogloss")
    grid_search = GridSearchCV(
        estimator=model, param_grid=param_grid, scoring="accuracy", cv=5, verbose=1
    )
    grid_search.fit(X_train, y_train)

    # Best model
    best_model = grid_search.best_estimator_
    best_model.save_model(MODEL_PATH)

# Load the best model
model = xgb.Booster()
model.load_model(MODEL_PATH)

# Evaluate model
X_test_dmatrix = xgb.DMatrix(X_test)
y_pred = model.predict(X_test_dmatrix)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted")
recall = recall_score(y_test, y_pred, average="weighted")
f1 = f1_score(y_test, y_pred, average="weighted")
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)


# Define prediction function
def predict(
    age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal
):
    # Convert string values to numeric where needed
    sex = int(sex)
    cp = int(cp)
    fbs = int(fbs)
    restecg = int(restecg)
    exang = int(exang)
    slope = int(slope)
    thal = int(thal)

    # Combine inputs into a single feature list
    features = np.array(
        [
            age,
            sex,
            cp,
            trestbps,
            chol,
            fbs,
            restecg,
            thalach,
            exang,
            oldpeak,
            slope,
            ca,
            thal,
        ]
    ).reshape(1, -1)

    # Preprocess the inputs
    features = scaler.transform(imputer.transform(features))

    # Predict using the trained model
    dmatrix = xgb.DMatrix(features)
    prediction = model.predict(dmatrix)
    return int(prediction[0])


# Gradio interface
feature_inputs = [
    gr.Number(label="Age (years)"),
    gr.Radio(label="Sex", choices=["0", "1"], type="value"),  # Male: 1, Female: 0
    gr.Radio(label="Chest Pain Type (cp)", choices=["0", "1", "2", "3"], type="value"),
    gr.Number(label="Resting Blood Pressure (mm Hg)"),
    gr.Number(label="Serum Cholestoral (mg/dl)"),
    gr.Radio(
        label="Fasting Blood Sugar > 120 mg/dl (fbs)", choices=["0", "1"], type="value"
    ),
    gr.Radio(
        label="Resting ECG Results (restecg)", choices=["0", "1", "2"], type="value"
    ),
    gr.Number(label="Maximum Heart Rate Achieved (thalach)"),
    gr.Radio(label="Exercise Induced Angina (exang)", choices=["0", "1"], type="value"),
    gr.Number(label="ST Depression Induced by Exercise (oldpeak)"),
    gr.Radio(
        label="Slope of the Peak Exercise ST Segment (slope)",
        choices=["0", "1", "2"],
        type="value",
    ),
    gr.Number(label="Number of Major Vessels Colored by Fluoroscopy (ca)"),
    gr.Radio(label="Thalassemia (thal)", choices=["0", "1", "2", "3"], type="value"),
]

interface = gr.Interface(
    fn=predict,
    inputs=feature_inputs,
    outputs="label",
    title="Heart Disease Prediction",
    description=(
        "Predicts heart disease based on patient information. "
        "Provide the required features to get a diagnosis prediction."
    ),
)

if __name__ == "__main__":
    interface.launch()