import gradio as gr import numpy as np import xgboost as xgb from sklearn.impute import SimpleImputer from sklearn.preprocessing import StandardScaler from imblearn.over_sampling import SMOTE from sklearn.model_selection import train_test_split, GridSearchCV from sklearn.metrics import ( accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ) from ucimlrepo import fetch_ucirepo import os # Paths for saving/loading the model MODEL_PATH = "heart_disease_model.json" # Load and preprocess the data heart_disease = fetch_ucirepo(id=45) X = heart_disease.data.features y = np.ravel(heart_disease.data.targets) # Preprocessing pipeline imputer = SimpleImputer(strategy="mean") X = imputer.fit_transform(X) scaler = StandardScaler() X = scaler.fit_transform(X) smote = SMOTE(random_state=42) X_resampled, y_resampled = smote.fit_resample(X, y) # Train-test split X_train, X_test, y_train, y_test = train_test_split( X_resampled, y_resampled, test_size=0.2, random_state=42, stratify=y_resampled ) # Train or load the model if os.path.exists(MODEL_PATH): # Load pre-trained model model = xgb.Booster() model.load_model(MODEL_PATH) else: # Hyperparameter tuning param_grid = { "objective": ["binary:logistic"], # For binary classification "max_depth": [4, 5, 6], "learning_rate": [0.01, 0.05, 0.1], "n_estimators": [100, 200, 300], "subsample": [0.8, 1.0], "colsample_bytree": [0.8, 1.0], "gamma": [0, 1, 5], "lambda": [1, 2, 3], "alpha": [0, 1], } model = xgb.XGBClassifier(use_label_encoder=False, eval_metric="mlogloss") grid_search = GridSearchCV( estimator=model, param_grid=param_grid, scoring="accuracy", cv=5, verbose=1 ) grid_search.fit(X_train, y_train) # Best model best_model = grid_search.best_estimator_ best_model.save_model(MODEL_PATH) # Load the best model model = xgb.Booster() model.load_model(MODEL_PATH) # Evaluate model X_test_dmatrix = xgb.DMatrix(X_test) y_pred = model.predict(X_test_dmatrix) accuracy = accuracy_score(y_test, y_pred) precision = precision_score(y_test, y_pred, average="weighted") recall = recall_score(y_test, y_pred, average="weighted") f1 = f1_score(y_test, y_pred, average="weighted") conf_matrix = confusion_matrix(y_test, y_pred) print(f"Accuracy: {accuracy * 100:.2f}%") print(f"Precision: {precision:.2f}") print(f"Recall: {recall:.2f}") print(f"F1 Score: {f1:.2f}") print("Confusion Matrix:") print(conf_matrix) # Define prediction function def predict( age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal ): # Convert string values to numeric where needed sex = int(sex) cp = int(cp) fbs = int(fbs) restecg = int(restecg) exang = int(exang) slope = int(slope) thal = int(thal) # Combine inputs into a single feature list features = np.array( [ age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal, ] ).reshape(1, -1) # Preprocess the inputs features = scaler.transform(imputer.transform(features)) # Predict using the trained model dmatrix = xgb.DMatrix(features) prediction = model.predict(dmatrix) return int(prediction[0]) # Gradio interface feature_inputs = [ gr.Number(label="Age (years)"), gr.Radio(label="Sex", choices=["0", "1"], type="value"), # Male: 1, Female: 0 gr.Radio(label="Chest Pain Type (cp)", choices=["0", "1", "2", "3"], type="value"), gr.Number(label="Resting Blood Pressure (mm Hg)"), gr.Number(label="Serum Cholestoral (mg/dl)"), gr.Radio( label="Fasting Blood Sugar > 120 mg/dl (fbs)", choices=["0", "1"], type="value" ), gr.Radio( label="Resting ECG Results (restecg)", choices=["0", "1", "2"], type="value" ), gr.Number(label="Maximum Heart Rate Achieved (thalach)"), gr.Radio(label="Exercise Induced Angina (exang)", choices=["0", "1"], type="value"), gr.Number(label="ST Depression Induced by Exercise (oldpeak)"), gr.Radio( label="Slope of the Peak Exercise ST Segment (slope)", choices=["0", "1", "2"], type="value", ), gr.Number(label="Number of Major Vessels Colored by Fluoroscopy (ca)"), gr.Radio(label="Thalassemia (thal)", choices=["0", "1", "2", "3"], type="value"), ] interface = gr.Interface( fn=predict, inputs=feature_inputs, outputs="label", title="Heart Disease Prediction", description=( "Predicts heart disease based on patient information. " "Provide the required features to get a diagnosis prediction." ), ) if __name__ == "__main__": interface.launch()