import pandas as pd from sklearn.model_selection import train_test_split import numpy as np from sklearn.metrics import accuracy_score import matplotlib.pyplot as plt import seaborn as sns # Check for missing values #Loading Data data = pd.read_csv('Cardio_Vascular_Disease_by_Gut_Microbiota.csv') print(data.head()) from sklearn.ensemble import RandomForestClassifier # Define features and target X = data.drop(columns=['patient_id', 'CVD_Status']) y = data['CVD_Status'] # Train a RandomForest model rf = RandomForestClassifier(random_state=42) rf.fit(X, y) # Feature importances importances = rf.feature_importances_ # Plot feature importances feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances}) feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False) plt.figure(figsize=(10,6)) sns.barplot(x='Importance', y='Feature', data=feature_importance_df) plt.title('Feature Importance from Random Forest') plt.show() from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.metrics import accuracy_score, confusion_matrix from sklearn.metrics import accuracy_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error from math import sqrt # Initialize the models gradient_boosting = GradientBoostingClassifier(random_state=42) # Split into training and testing sets (80% train, 20% test) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train and evaluate Gradient Boosting gradient_boosting.fit(X_train, y_train) y_pred_gb = gradient_boosting.predict(X_test) accuracy_gb = accuracy_score(y_test, y_pred_gb) conf_matrix_gb = confusion_matrix(y_test, y_pred_gb) # Print results print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") print(f"Confusion Matrix:\n {conf_matrix_gb}\n") # Predict probabilities y_pred_prob_gb = gradient_boosting.predict_proba(X_test)[:, 1] # Predict class labels y_pred_gb = gradient_boosting.predict(X_test) # Calculate R² Score, RMSE, MSE, and MAE for Gradient Boosting r2_gb = r2_score(y_test, y_pred_prob_gb) rmse_gb = sqrt(mean_squared_error(y_test, y_pred_prob_gb)) mse_gb = mean_squared_error(y_test, y_pred_prob_gb) mae_gb = mean_absolute_error(y_test, y_pred_prob_gb) # Print Accuracy, R², RMSE, MSE, and MAE for Gradient Boosting print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") print(f"R² Score: {r2_gb:.4f}, RMSE: {rmse_gb:.4f}, MSE: {mse_gb:.4f}, MAE: {mae_gb:.4f}") print(f"Confusion Matrix:\n {conf_matrix_gb}\n") xgboost = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42) # Train and evaluate XGBoost xgboost.fit(X_train, y_train) y_pred_xgb = xgboost.predict(X_test) accuracy_xgb = accuracy_score(y_test, y_pred_xgb) conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb) print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") y_pred_prob_xgb = xgboost.predict_proba(X_test)[:, 1] y_pred_xgb = xgboost.predict(X_test) # Calculate R² Score, RMSE, MSE, and MAE for XGBoost r2_xgb = r2_score(y_test, y_pred_prob_xgb) rmse_xgb = sqrt(mean_squared_error(y_test, y_pred_prob_xgb)) mse_xgb = mean_squared_error(y_test, y_pred_prob_xgb) mae_xgb = mean_absolute_error(y_test, y_pred_prob_xgb) # Print Accuracy, R², RMSE, MSE, and MAE for XGBoost print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") print(f"R² Score: {r2_xgb:.4f}, RMSE: {rmse_xgb:.4f}, MSE: {mse_xgb:.4f}, MAE: {mae_xgb:.4f}") print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") lightgbm = LGBMClassifier(random_state=42) # Train and evaluate LightGBM lightgbm.fit(X_train, y_train) y_pred_lgbm = lightgbm.predict(X_test) accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm) conf_matrix_lgbm = confusion_matrix(y_test, y_pred_lgbm) print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") y_pred_prob_lgbm = lightgbm.predict_proba(X_test)[:, 1] y_pred_lgbm = lightgbm.predict(X_test) # Calculate R² Score, RMSE, MSE, and MAE for LightGBM r2_lgbm = r2_score(y_test, y_pred_prob_lgbm) rmse_lgbm = sqrt(mean_squared_error(y_test, y_pred_prob_lgbm)) mse_lgbm = mean_squared_error(y_test, y_pred_prob_lgbm) mae_lgbm = mean_absolute_error(y_test, y_pred_prob_lgbm) # Print Accuracy, R², RMSE, MSE, and MAE for LightGBM print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") print(f"R² Score: {r2_lgbm:.4f}, RMSE: {rmse_lgbm:.4f}, MSE: {mse_lgbm:.4f}, MAE: {mae_lgbm:.4f}") print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") import joblib # Assuming you have already trained the model (e.g., GradientBoostingClassifier, XGBoost, etc.) # Example with a Gradient Boosting model (replace with your trained model) from sklearn.ensemble import GradientBoostingClassifier # Assuming you have trained a model model = GradientBoostingClassifier(random_state=42) model.fit(X_train, y_train) # Replace this with your actual training code # Save the trained model as a .pkl file joblib.dump(model, 'trained_model.pkl') print("Model saved successfully as trained_model.pkl") def predict_cvd(Age, Gender, BMI, Blood_pressure, cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): # Convert Gender to numerical (assuming Male: 0, Female: 1) Gender = 1 if Gender.lower() == 'female' else 0 # Prepare the input data as a dataframe input_data = pd.DataFrame({ 'Age': [Age], 'Gender': [Gender], 'BMI': [BMI], 'Blood_pressure': [Blood_pressure], 'cholesterol': [cholesterol], 'Bacteroides_fragilis': [Bacteroides_fragilis], 'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], 'Akkermansia_muciniphila': [Akkermansia_muciniphila], 'Ruminococcus_bromii': [Ruminococcus_bromii], 'Microbiome_Diversity': [Microbiome_Diversity] }) print(input_data) # Print the input to debug # Predict CVD status (0 or 1) prediction = model.predict(input_data) # Return the result return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" import gradio as gr import pandas as pd import joblib # Load the pre-trained model model = joblib.load('trained_model.pkl') # Define the prediction function def predict_cvd(Age, Gender, BMI, Blood_pressure, Cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): try: # Convert Gender to numerical (assuming Male: 0, Female: 1) Gender = 1 if Gender.lower() == 'female' else 0 # Prepare the input data as a dataframe with correctly capitalized feature names input_data = pd.DataFrame({ 'Age': [Age], 'Gender': [Gender], 'BMI': [BMI], 'Blood_pressure': [Blood_pressure], 'Cholesterol': [Cholesterol], # Note the capital "C" 'Bacteroides_fragilis': [Bacteroides_fragilis], 'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], 'Akkermansia_muciniphila': [Akkermansia_muciniphila], 'Ruminococcus_bromii': [Ruminococcus_bromii], 'Microbiome_Diversity': [Microbiome_Diversity] }) # Make prediction prediction = model.predict(input_data) # Return result based on prediction return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" except Exception as e: return f"An error occurred: {str(e)}" # Define Gradio inputs with proper ranges and selections inputs = [ gr.Slider(18, 100, step=1, value=50, label="Age"), gr.Dropdown(['Male', 'Female'], label="Gender"), gr.Slider(10.0, 50.0, step=0.1, value=25.0, label="BMI"), gr.Slider(90, 200, step=1, value=120, label="Blood Pressure"), gr.Slider(100, 300, step=1, value=180, label="Cholesterol"), # Corrected capitalization gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Bacteroides Fragilis Level"), gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Faecalibacterium Prausnitzii Level"), gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Akkermansia Muciniphila Level"), gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Ruminococcus Bromii Level"), gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Microbiome Diversity"), ] # Define Gradio interface iface = gr.Interface(fn=predict_cvd, inputs=inputs, outputs="text", title="Cardiovascular Disease Prediction") # Launch the interface iface.launch()