Spaces:
Sleeping
Sleeping
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
import numpy as np | |
from sklearn.metrics import accuracy_score | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
# Check for missing values | |
#Loading Data | |
data = pd.read_csv('Cardio_Vascular_Disease_by_Gut_Microbiota.csv') | |
print(data.head()) | |
from sklearn.ensemble import RandomForestClassifier | |
# Define features and target | |
X = data.drop(columns=['patient_id', 'CVD_Status']) | |
y = data['CVD_Status'] | |
# Train a RandomForest model | |
rf = RandomForestClassifier(random_state=42) | |
rf.fit(X, y) | |
# Feature importances | |
importances = rf.feature_importances_ | |
# Plot feature importances | |
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances}) | |
feature_importance_df = feature_importance_df.sort_values('Importance', ascending=False) | |
plt.figure(figsize=(10,6)) | |
sns.barplot(x='Importance', y='Feature', data=feature_importance_df) | |
plt.title('Feature Importance from Random Forest') | |
plt.show() | |
from sklearn.ensemble import GradientBoostingClassifier | |
from xgboost import XGBClassifier | |
from lightgbm import LGBMClassifier | |
from sklearn.metrics import accuracy_score, confusion_matrix | |
from sklearn.metrics import accuracy_score, confusion_matrix, r2_score, mean_squared_error, mean_absolute_error | |
from math import sqrt | |
# Initialize the models | |
gradient_boosting = GradientBoostingClassifier(random_state=42) | |
# Split into training and testing sets (80% train, 20% test) | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Train and evaluate Gradient Boosting | |
gradient_boosting.fit(X_train, y_train) | |
y_pred_gb = gradient_boosting.predict(X_test) | |
accuracy_gb = accuracy_score(y_test, y_pred_gb) | |
conf_matrix_gb = confusion_matrix(y_test, y_pred_gb) | |
# Print results | |
print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") | |
print(f"Confusion Matrix:\n {conf_matrix_gb}\n") | |
# Predict probabilities | |
y_pred_prob_gb = gradient_boosting.predict_proba(X_test)[:, 1] | |
# Predict class labels | |
y_pred_gb = gradient_boosting.predict(X_test) | |
# Calculate R² Score, RMSE, MSE, and MAE for Gradient Boosting | |
r2_gb = r2_score(y_test, y_pred_prob_gb) | |
rmse_gb = sqrt(mean_squared_error(y_test, y_pred_prob_gb)) | |
mse_gb = mean_squared_error(y_test, y_pred_prob_gb) | |
mae_gb = mean_absolute_error(y_test, y_pred_prob_gb) | |
# Print Accuracy, R², RMSE, MSE, and MAE for Gradient Boosting | |
print(f"Gradient Boosting Accuracy: {accuracy_gb * 100:.2f}%") | |
print(f"R² Score: {r2_gb:.4f}, RMSE: {rmse_gb:.4f}, MSE: {mse_gb:.4f}, MAE: {mae_gb:.4f}") | |
print(f"Confusion Matrix:\n {conf_matrix_gb}\n") | |
xgboost = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42) | |
# Train and evaluate XGBoost | |
xgboost.fit(X_train, y_train) | |
y_pred_xgb = xgboost.predict(X_test) | |
accuracy_xgb = accuracy_score(y_test, y_pred_xgb) | |
conf_matrix_xgb = confusion_matrix(y_test, y_pred_xgb) | |
print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") | |
print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") | |
y_pred_prob_xgb = xgboost.predict_proba(X_test)[:, 1] | |
y_pred_xgb = xgboost.predict(X_test) | |
# Calculate R² Score, RMSE, MSE, and MAE for XGBoost | |
r2_xgb = r2_score(y_test, y_pred_prob_xgb) | |
rmse_xgb = sqrt(mean_squared_error(y_test, y_pred_prob_xgb)) | |
mse_xgb = mean_squared_error(y_test, y_pred_prob_xgb) | |
mae_xgb = mean_absolute_error(y_test, y_pred_prob_xgb) | |
# Print Accuracy, R², RMSE, MSE, and MAE for XGBoost | |
print(f"XGBoost Accuracy: {accuracy_xgb * 100:.2f}%") | |
print(f"R² Score: {r2_xgb:.4f}, RMSE: {rmse_xgb:.4f}, MSE: {mse_xgb:.4f}, MAE: {mae_xgb:.4f}") | |
print(f"Confusion Matrix:\n {conf_matrix_xgb}\n") | |
lightgbm = LGBMClassifier(random_state=42) | |
# Train and evaluate LightGBM | |
lightgbm.fit(X_train, y_train) | |
y_pred_lgbm = lightgbm.predict(X_test) | |
accuracy_lgbm = accuracy_score(y_test, y_pred_lgbm) | |
conf_matrix_lgbm = confusion_matrix(y_test, y_pred_lgbm) | |
print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") | |
print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") | |
y_pred_prob_lgbm = lightgbm.predict_proba(X_test)[:, 1] | |
y_pred_lgbm = lightgbm.predict(X_test) | |
# Calculate R² Score, RMSE, MSE, and MAE for LightGBM | |
r2_lgbm = r2_score(y_test, y_pred_prob_lgbm) | |
rmse_lgbm = sqrt(mean_squared_error(y_test, y_pred_prob_lgbm)) | |
mse_lgbm = mean_squared_error(y_test, y_pred_prob_lgbm) | |
mae_lgbm = mean_absolute_error(y_test, y_pred_prob_lgbm) | |
# Print Accuracy, R², RMSE, MSE, and MAE for LightGBM | |
print(f"LightGBM Accuracy: {accuracy_lgbm * 100:.2f}%") | |
print(f"R² Score: {r2_lgbm:.4f}, RMSE: {rmse_lgbm:.4f}, MSE: {mse_lgbm:.4f}, MAE: {mae_lgbm:.4f}") | |
print(f"Confusion Matrix:\n {conf_matrix_lgbm}\n") | |
import joblib | |
# Assuming you have already trained the model (e.g., GradientBoostingClassifier, XGBoost, etc.) | |
# Example with a Gradient Boosting model (replace with your trained model) | |
from sklearn.ensemble import GradientBoostingClassifier | |
# Assuming you have trained a model | |
model = GradientBoostingClassifier(random_state=42) | |
model.fit(X_train, y_train) # Replace this with your actual training code | |
# Save the trained model as a .pkl file | |
joblib.dump(model, 'trained_model.pkl') | |
print("Model saved successfully as trained_model.pkl") | |
def predict_cvd(Age, Gender, BMI, Blood_pressure, cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, | |
Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): | |
# Convert Gender to numerical (assuming Male: 0, Female: 1) | |
Gender = 1 if Gender.lower() == 'female' else 0 | |
# Prepare the input data as a dataframe | |
input_data = pd.DataFrame({ | |
'Age': [Age], | |
'Gender': [Gender], | |
'BMI': [BMI], | |
'Blood_pressure': [Blood_pressure], | |
'cholesterol': [cholesterol], | |
'Bacteroides_fragilis': [Bacteroides_fragilis], | |
'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], | |
'Akkermansia_muciniphila': [Akkermansia_muciniphila], | |
'Ruminococcus_bromii': [Ruminococcus_bromii], | |
'Microbiome_Diversity': [Microbiome_Diversity] | |
}) | |
print(input_data) # Print the input to debug | |
# Predict CVD status (0 or 1) | |
prediction = model.predict(input_data) | |
# Return the result | |
return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" | |
import gradio as gr | |
import pandas as pd | |
import joblib | |
# Load the pre-trained model | |
model = joblib.load('trained_model.pkl') | |
# Define the prediction function | |
def predict_cvd(Age, Gender, BMI, Blood_pressure, Cholesterol, Bacteroides_fragilis, Faecalibacterium_prausnitzii, | |
Akkermansia_muciniphila, Ruminococcus_bromii, Microbiome_Diversity): | |
try: | |
# Convert Gender to numerical (assuming Male: 0, Female: 1) | |
Gender = 1 if Gender.lower() == 'female' else 0 | |
# Prepare the input data as a dataframe with correctly capitalized feature names | |
input_data = pd.DataFrame({ | |
'Age': [Age], | |
'Gender': [Gender], | |
'BMI': [BMI], | |
'Blood_pressure': [Blood_pressure], | |
'Cholesterol': [Cholesterol], # Note the capital "C" | |
'Bacteroides_fragilis': [Bacteroides_fragilis], | |
'Faecalibacterium_prausnitzii': [Faecalibacterium_prausnitzii], | |
'Akkermansia_muciniphila': [Akkermansia_muciniphila], | |
'Ruminococcus_bromii': [Ruminococcus_bromii], | |
'Microbiome_Diversity': [Microbiome_Diversity] | |
}) | |
# Make prediction | |
prediction = model.predict(input_data) | |
# Return result based on prediction | |
return "Cardiovascular Disease Detected" if prediction[0] == 1 else "No Cardiovascular Disease Detected" | |
except Exception as e: | |
return f"An error occurred: {str(e)}" | |
# Define Gradio inputs with proper ranges and selections | |
inputs = [ | |
gr.Slider(18, 100, step=1, value=50, label="Age"), | |
gr.Dropdown(['Male', 'Female'], label="Gender"), | |
gr.Slider(10.0, 50.0, step=0.1, value=25.0, label="BMI"), | |
gr.Slider(90, 200, step=1, value=120, label="Blood Pressure"), | |
gr.Slider(100, 300, step=1, value=180, label="Cholesterol"), # Corrected capitalization | |
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Bacteroides Fragilis Level"), | |
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Faecalibacterium Prausnitzii Level"), | |
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Akkermansia Muciniphila Level"), | |
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Ruminococcus Bromii Level"), | |
gr.Slider(0.0, 10.0, step=0.1, value=5.0, label="Microbiome Diversity"), | |
] | |
# Define Gradio interface | |
iface = gr.Interface(fn=predict_cvd, inputs=inputs, outputs="text", title="Cardiovascular Disease Prediction") | |
# Launch the interface | |
iface.launch() |