Spaces:

minusquare
/

gradio_app

Sleeping

App Files Files Community

minusquare commited on Oct 29, 2024

Commit

9c68205

verified ·

1 Parent(s): 9ba38e6

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

.ipynb_checkpoints/gradio_hearttack_app-checkpoint.py +25 -8
.ipynb_checkpoints/gradio_hearttack_app_old-checkpoint.py +83 -0
.ipynb_checkpoints/requirements-checkpoint.txt +1 -241
Untitled.ipynb +3 -37
gradio_hearttack_app.py +23 -6
gradio_hearttack_app_old.py +83 -0
requirements.txt +5 -2
shap_plot.png +0 -0
test_cases +5 -0

.ipynb_checkpoints/gradio_hearttack_app-checkpoint.py CHANGED Viewed

@@ -4,18 +4,24 @@ import joblib
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 import pandas as pd
 # Load the model and the scaler
 model = joblib.load('best_XGB.pkl')
-scaler = joblib.load('scaler.pkl')  # Load the scaler if you saved it during training
 cutoff = 0.42  # Custom cutoff probability
-# Define the prediction function with preprocessing and scaling
 def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
     # Define feature names in the same order as the training data
     feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
     # Create a DataFrame with the correct feature names for prediction
-    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]],      columns=feature_names)
     # Standardize the features (scaling)
     scaled_features = scaler.transform(features)
@@ -29,11 +35,19 @@ def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes
     else:
         prediction_class = 0
     result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
-    return result
-# Create the Gradio interface with preprocessing and prediction logic
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
@@ -41,14 +55,14 @@ with gr.Blocks() as app:
             cigsPerDay = gr.Slider(0, 40, step=1, label="Cigarettes per Day")
             prevalentHyp = gr.Radio([0, 1], label="Prevalent Hypertension (0=No, 1=Yes)")
             totChol = gr.Slider(100, 400, step=1, label="Total Cholesterol in mg/dl")
-            diaBP = gr.Slider(60, 120, step=1, label="Diastolic/Higher BP")
             heartRate = gr.Slider(50, 120, step=1, label="Heart Rate")
         with gr.Column():
             age = gr.Slider(20, 80, step=1, label="Age (years)")
             BPMeds = gr.Radio([0, 1], label="On BP Medications (0=No, 1=Yes)")
             diabetes = gr.Radio([0, 1], label="Diabetes (0=No, 1=Yes)")
-            sysBP = gr.Slider(90, 200, step=1, label="Systolic BP/Lower BP")
             BMI = gr.Slider(15, 40, step=0.1, label="Body Mass Index (BMI) in kg/m2")
             glucose = gr.Slider(50, 250, step=1, label="Fasting Glucose Level")
@@ -59,8 +73,11 @@ with gr.Blocks() as app:
     with gr.Row():
         prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
     # Link inputs and prediction output
     submit_btn = gr.Button("Submit")
-    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=prediction_output)
 app.launch(share = True)

 import numpy as np
 from sklearn.preprocessing import StandardScaler
 import pandas as pd
+import shap
+import matplotlib.pyplot as plt
 # Load the model and the scaler
 model = joblib.load('best_XGB.pkl')
+scaler = joblib.load('scaler.pkl')  # Ensure the scaler is saved and loaded with the same scikit-learn version
 cutoff = 0.42  # Custom cutoff probability
+# Use TreeExplainer for XGBoost models
+explainer = shap.TreeExplainer(model)
+# Define the prediction function with preprocessing, scaling, and SHAP analysis
 def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
     # Define feature names in the same order as the training data
     feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
     # Create a DataFrame with the correct feature names for prediction
+    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]], columns=feature_names)
     # Standardize the features (scaling)
     scaled_features = scaler.transform(features)
     else:
         prediction_class = 0
+    # Generate SHAP values for the prediction using the explainer
+    shap_values = explainer(features)
+    # Plot SHAP values
+    plt.figure(figsize=(8, 6))
+    shap.waterfall_plot(shap_values[0])  # Using the SHAP Explanation object
+    plt.savefig('shap_plot.png')  # Save SHAP plot to a file
     result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
+    return result, 'shap_plot.png'  # Return the prediction and SHAP plot
+# Create the Gradio interface with preprocessing, prediction, and SHAP visualization
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
             cigsPerDay = gr.Slider(0, 40, step=1, label="Cigarettes per Day")
             prevalentHyp = gr.Radio([0, 1], label="Prevalent Hypertension (0=No, 1=Yes)")
             totChol = gr.Slider(100, 400, step=1, label="Total Cholesterol in mg/dl")
+            diaBP = gr.Slider(60, 120, step=1, label="Diastolic/Lower BP")
             heartRate = gr.Slider(50, 120, step=1, label="Heart Rate")
         with gr.Column():
             age = gr.Slider(20, 80, step=1, label="Age (years)")
             BPMeds = gr.Radio([0, 1], label="On BP Medications (0=No, 1=Yes)")
             diabetes = gr.Radio([0, 1], label="Diabetes (0=No, 1=Yes)")
+            sysBP = gr.Slider(90, 200, step=1, label="Systolic BP/Higher BP")
             BMI = gr.Slider(15, 40, step=0.1, label="Body Mass Index (BMI) in kg/m2")
             glucose = gr.Slider(50, 250, step=1, label="Fasting Glucose Level")
     with gr.Row():
         prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
+    with gr.Row():
+        shap_plot_output = gr.Image(label="SHAP Analysis")
     # Link inputs and prediction output
     submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=[prediction_output, shap_plot_output])
 app.launch(share = True)

.ipynb_checkpoints/gradio_hearttack_app_old-checkpoint.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import xgboost as xgb
+import joblib
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+import pandas as pd
+import shap
+import matplotlib.pyplot as plt
+# Load the model and the scaler
+model = joblib.load('best_XGB.pkl')
+scaler = joblib.load('scaler.pkl')  # Load the scaler that was saved during training
+cutoff = 0.42  # Custom cutoff probability
+# Load SHAP explainer based on your XGBoost model
+explainer = shap.Explainer(model)
+# Define the prediction function with preprocessing, scaling, and SHAP analysis
+def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
+    # Define feature names in the same order as the training data
+    feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
+    # Create a DataFrame with the correct feature names for prediction
+    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]], columns=feature_names)
+    # Standardize the features (scaling)
+    scaled_features = scaler.transform(features)
+    # Predict probabilities
+    proba = model.predict_proba(scaled_features)[:, 1]  # Probability of class 1 (heart attack)
+    # Apply custom cutoff
+    if proba[0] >= cutoff:
+        prediction_class = 1
+    else:
+        prediction_class = 0
+    # Generate SHAP values for the prediction
+    shap_values = explainer(scaled_features)
+    # Plot SHAP values
+    plt.figure(figsize=(8, 6))
+    shap.waterfall_plot(shap_values[0])
+    plt.savefig('shap_plot.png')  # Save SHAP plot to a file
+    result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
+    return result, 'shap_plot.png'  # Return the prediction and SHAP plot
+# Create the Gradio interface with preprocessing, prediction, and SHAP visualization
+with gr.Blocks() as app:
+    with gr.Row():
+        with gr.Column():
+            Gender = gr.Radio([0, 1], label="Gender (0=Female, 1=Male)")
+            cigsPerDay = gr.Slider(0, 40, step=1, label="Cigarettes per Day")
+            prevalentHyp = gr.Radio([0, 1], label="Prevalent Hypertension (0=No, 1=Yes)")
+            totChol = gr.Slider(100, 400, step=1, label="Total Cholesterol in mg/dl")
+            diaBP = gr.Slider(60, 120, step=1, label="Diastolic/Lower BP")
+            heartRate = gr.Slider(50, 120, step=1, label="Heart Rate")
+        with gr.Column():
+            age = gr.Slider(20, 80, step=1, label="Age (years)")
+            BPMeds = gr.Radio([0, 1], label="On BP Medications (0=No, 1=Yes)")
+            diabetes = gr.Radio([0, 1], label="Diabetes (0=No, 1=Yes)")
+            sysBP = gr.Slider(90, 200, step=1, label="Systolic BP/Higher BP")
+            BMI = gr.Slider(15, 40, step=0.1, label="Body Mass Index  (weight in kg/ height in meter squared)(BMI) in kg/m2")
+            glucose = gr.Slider(50, 250, step=1, label="Fasting Glucose Level")
+    # Center-aligned prediction output
+    with gr.Row():
+        gr.HTML("<div style='text-align: center; width: 100%'>Heart Attack Prediction</div>")
+    with gr.Row():
+        prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
+    with gr.Row():
+        shap_plot_output = gr.Image(label="SHAP Analysis")
+    # Link inputs and prediction output
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=[prediction_output, shap_plot_output])
+app.launch()

.ipynb_checkpoints/requirements-checkpoint.txt CHANGED Viewed

@@ -1,247 +1,7 @@
-absl-py==2.1.0
-aiofiles==23.2.1
-alembic==1.13.3
-altair==5.3.0
-aniso8601==9.0.1
-annotated-types==0.7.0
-anyio==4.6.2.post1
-asn1crypto==1.5.1
-asttokens==2.4.1
-astunparse==1.6.3
-attrs==23.2.0
-Automat==22.10.0
-bayesian-optimization==1.4.3
-beautifulsoup4==4.12.3
-blinker==1.8.2
-cachetools==5.3.3
-certifi==2024.2.2
-cffi==1.16.0
-charset-normalizer==3.3.2
-chromedriver-autoinstaller==0.6.4
-click==8.1.7
-cloudpickle==3.1.0
-colorama==0.4.6
-comm==0.2.2
-constantly==23.10.4
-contourpy==1.2.1
-convertdate==2.4.0
-cryptography==43.0.0
-cssselect==1.2.0
-cycler==0.12.1
-Cython==3.0.10
-dash==2.17.0
-dash-core-components==2.0.0
-dash-html-components==2.0.0
-dash-table==5.0.0
-databricks-sdk==0.34.0
-dearpygui==1.11.1
-debugpy==1.8.1
-decorator==5.1.1
-defusedxml==0.7.1
-Deprecated==1.2.14
-dnspython==2.6.1
-docker==7.1.0
-docutils==0.21.2
-et-xmlfile==1.1.0
-executing==2.0.1
-fakeredis==2.23.2
-fastapi==0.115.2
-fastjsonschema==2.19.1
-ffmpy==0.4.0
-filelock==3.15.4
-Flask==3.0.3
-flatbuffers==24.3.25
-fonttools==4.51.0
-frozendict==2.4.4
-fsspec==2024.9.0
-gast==0.5.4
-gitdb==4.0.11
-GitPython==3.1.43
-google-auth==2.35.0
-google-pasta==0.2.0
 gradio==5.1.0
 gradio_client==1.4.0
-graphene==3.3
-graphql-core==3.2.5
-graphql-relay==3.2.0
-graphviz==0.20.3
-greenlet==3.0.3
-grpcio==1.64.1
-gunicorn==23.0.0
-h11==0.14.0
-h5py==3.11.0
-holidays==0.53
-html5lib==1.1
-httpcore==1.0.6
-httpx==0.27.2
-huggingface-hub==0.25.2
-hyperlink==21.0.0
-idna==3.7
-imbalanced-learn==0.12.4
-imblearn==0.0
-importlib_metadata==7.1.0
-incremental==24.7.0
-install==1.3.5
-ipykernel==6.29.4
-ipython==8.24.0
-itemadapter==0.9.0
-itemloaders==1.3.1
-itsdangerous==2.2.0
-jedi==0.19.1
-Jinja2==3.1.4
-jmespath==1.0.1
 joblib==1.4.2
-jsonschema==4.22.0
-jsonschema-specifications==2023.12.1
-jupyter_client==8.6.1
-jupyter_core==5.7.2
-keras==3.3.3
-Kivy==2.3.0
-Kivy-Garden==0.1.5
-kiwisolver==1.4.5
-libclang==18.1.1
-lxml==5.2.2
-Mako==1.3.5
-Markdown==3.6
-markdown-it-py==3.0.0
-MarkupSafe==2.1.5
-matplotlib==3.8.4
-matplotlib-inline==0.1.7
-mdurl==0.1.2
-ml-dtypes==0.3.2
-mlflow==2.17.0
-mlflow-skinny==2.17.0
-mlxtend==0.23.1
-multitasking==0.0.11
-namex==0.0.8
-nbformat==5.10.4
-nest-asyncio==1.6.0
-networkx==3.3
 numpy==1.26.4
-nvidia-cublas-cu12==12.3.4.1
-nvidia-cuda-cupti-cu12==12.3.101
-nvidia-cuda-nvcc-cu12==12.3.107
-nvidia-cuda-nvrtc-cu12==12.3.107
-nvidia-cuda-runtime-cu12==12.3.101
-nvidia-cudnn-cu12==8.9.7.29
-nvidia-cufft-cu12==11.0.12.1
-nvidia-curand-cu12==10.3.4.107
-nvidia-cusolver-cu12==11.5.4.101
-nvidia-cusparse-cu12==12.2.0.103
-nvidia-nccl-cu12==2.19.3
-nvidia-nvjitlink-cu12==12.3.101
-openpyxl==3.1.2
-opentelemetry-api==1.27.0
-opentelemetry-sdk==1.27.0
-opentelemetry-semantic-conventions==0.48b0
-opt-einsum==3.3.0
-optree==0.11.0
-orjson==3.10.7
-outcome==1.3.0.post0
-packaging==24.0
 pandas==2.2.2
-parsel==1.9.1
-parso==0.8.4
-patsy==0.5.6
-peewee==3.17.5
-pexpect==4.9.0
-pg8000==1.31.2
-pillow==10.3.0
-platformdirs==4.2.1
-plotly==5.22.0
-pmdarima==2.0.4
-prompt-toolkit==3.0.43
-Protego==0.3.1
-protobuf==4.25.3
-psutil==5.9.8
-ptyprocess==0.7.0
-pure-eval==0.2.2
-pyarrow==16.1.0
-pyasn1==0.6.0
-pyasn1_modules==0.4.0
-pycparser==2.22
-pydantic==2.9.2
-pydantic_core==2.23.4
-pydeck==0.9.1
-PyDispatcher==2.0.7
-pydot==2.0.0
-pydub==0.25.1
-Pygments==2.18.0
-PyMeeus==0.5.12
-pymongo==4.7.3
-pyOpenSSL==24.2.1
-pyparsing==3.1.2
-PySocks==1.7.1
-pystan==2.19.1.1
-python-dateutil==2.9.0.post0
-python-dotenv==1.0.1
-python-multipart==0.0.12
-pytz==2024.1
-PyYAML==6.0.2
-pyzmq==26.0.3
-queuelib==1.7.0
-redis==5.0.6
-referencing==0.35.1
-requests==2.31.0
-requests-file==2.1.0
-retrying==1.3.4
-rich==13.7.1
-rpds-py==0.18.1
-rsa==4.9
-ruff==0.7.0
 scikit-learn==1.4.2
-scipy==1.13.0
-scramp==1.4.5
-Scrapy==2.11.2
-seaborn==0.13.2
-selenium==4.23.1
-semantic-version==2.10.0
-service-identity==24.1.0
-shellingham==1.5.4
-six==1.16.0
-smmap==5.0.1
-sniffio==1.3.1
-sortedcontainers==2.4.0
-soupsieve==2.5
-SQLAlchemy==2.0.31
-sqlparse==0.5.1
-stack-data==0.6.3
-starlette==0.40.0
-statsmodels==0.14.2
-streamlit==1.36.0
-tenacity==8.3.0
-tensorboard==2.17.1
-tensorboard-data-server==0.7.2
-tensorflow==2.17.0
-tensorflow-io-gcs-filesystem==0.37.0
-termcolor==2.4.0
-threadpoolctl==3.5.0
-tldextract==5.1.2
-toml==0.10.2
-tomlkit==0.12.0
-toolz==0.12.1
-tornado==6.4
-tqdm==4.66.5
-traitlets==5.14.3
-trio==0.26.0
-trio-websocket==0.11.1
-Twisted==24.3.0
-typer==0.12.5
-typing_extensions==4.11.0
-tzdata==2024.1
-urllib3==2.2.1
-uvicorn==0.32.0
-w3lib==2.2.1
-watchdog==4.0.1
-wcwidth==0.2.13
-webdriver-manager==4.0.2
-webencodings==0.5.1
-websocket-client==1.8.0
-websockets==12.0
-Werkzeug==3.0.3
-wrapt==1.16.0
-wsproto==1.2.0
-xgboost==2.0.3
-yfinance==0.2.40
-zipp==3.19.0
-zope.interface==6.4.post2

 gradio==5.1.0
 gradio_client==1.4.0
 joblib==1.4.2
 numpy==1.26.4
 pandas==2.2.2
 scikit-learn==1.4.2
+xgboost==2.0.3

Untitled.ipynb CHANGED Viewed

@@ -1,43 +1,9 @@
 {
  "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "b1110063-e160-456d-ae0b-80d9cae3b8a5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ecube/basicds_py311/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "import gradio as gr\n",
-    "import xgboost as xgb\n",
-    "import joblib\n",
-    "import numpy as np\n",
-    "from sklearn.preprocessing import StandardScaler\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "5e2655ee-1663-44f1-b05c-708b32c23e6a",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip freeze >> requirements.txt"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "c71ec40a-634c-4735-bba9-31da888e3a5b",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -45,9 +11,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "basicds_py311",
    "language": "python",
-   "name": "basicds_py311"
   },
   "language_info": {
    "codemirror_mode": {

 {
  "cells": [
   {
    "cell_type": "code",
    "execution_count": null,
+   "id": "0ab0b011-35e0-4bef-a713-eb4a49b4e8a1",
    "metadata": {},
    "outputs": [],
    "source": []
  ],
  "metadata": {
   "kernelspec": {
+   "display_name": "heart_disease_prediction",
    "language": "python",
+   "name": "heart_disease_prediction"
   },
   "language_info": {
    "codemirror_mode": {

gradio_hearttack_app.py CHANGED Viewed

@@ -4,18 +4,24 @@ import joblib
 import numpy as np
 from sklearn.preprocessing import StandardScaler
 import pandas as pd
 # Load the model and the scaler
 model = joblib.load('best_XGB.pkl')
-scaler = joblib.load('scaler.pkl')  # Load the scaler if you saved it during training
 cutoff = 0.42  # Custom cutoff probability
-# Define the prediction function with preprocessing and scaling
 def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
     # Define feature names in the same order as the training data
     feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
     # Create a DataFrame with the correct feature names for prediction
-    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]],      columns=feature_names)
     # Standardize the features (scaling)
     scaled_features = scaler.transform(features)
@@ -29,11 +35,19 @@ def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes
     else:
         prediction_class = 0
     result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
-    return result
-# Create the Gradio interface with preprocessing and prediction logic
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
@@ -59,8 +73,11 @@ with gr.Blocks() as app:
     with gr.Row():
         prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
     # Link inputs and prediction output
     submit_btn = gr.Button("Submit")
-    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=prediction_output)
 app.launch(share = True)

 import numpy as np
 from sklearn.preprocessing import StandardScaler
 import pandas as pd
+import shap
+import matplotlib.pyplot as plt
 # Load the model and the scaler
 model = joblib.load('best_XGB.pkl')
+scaler = joblib.load('scaler.pkl')  # Ensure the scaler is saved and loaded with the same scikit-learn version
 cutoff = 0.42  # Custom cutoff probability
+# Use TreeExplainer for XGBoost models
+explainer = shap.TreeExplainer(model)
+# Define the prediction function with preprocessing, scaling, and SHAP analysis
 def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
     # Define feature names in the same order as the training data
     feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
     # Create a DataFrame with the correct feature names for prediction
+    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]], columns=feature_names)
     # Standardize the features (scaling)
     scaled_features = scaler.transform(features)
     else:
         prediction_class = 0
+    # Generate SHAP values for the prediction using the explainer
+    shap_values = explainer(features)
+    # Plot SHAP values
+    plt.figure(figsize=(8, 6))
+    shap.waterfall_plot(shap_values[0])  # Using the SHAP Explanation object
+    plt.savefig('shap_plot.png')  # Save SHAP plot to a file
     result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
+    return result, 'shap_plot.png'  # Return the prediction and SHAP plot
+# Create the Gradio interface with preprocessing, prediction, and SHAP visualization
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
     with gr.Row():
         prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
+    with gr.Row():
+        shap_plot_output = gr.Image(label="SHAP Analysis")
     # Link inputs and prediction output
     submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=[prediction_output, shap_plot_output])
 app.launch(share = True)

gradio_hearttack_app_old.py ADDED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import xgboost as xgb
+import joblib
+import numpy as np
+from sklearn.preprocessing import StandardScaler
+import pandas as pd
+import shap
+import matplotlib.pyplot as plt
+# Load the model and the scaler
+model = joblib.load('best_XGB.pkl')
+scaler = joblib.load('scaler.pkl')  # Load the scaler that was saved during training
+cutoff = 0.42  # Custom cutoff probability
+# Load SHAP explainer based on your XGBoost model
+explainer = shap.Explainer(model)
+# Define the prediction function with preprocessing, scaling, and SHAP analysis
+def predict_heart_attack(Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose):
+    # Define feature names in the same order as the training data
+    feature_names = ['Gender', 'age', 'cigsPerDay', 'BPMeds', 'prevalentHyp', 'diabetes', 'totChol', 'sysBP', 'diaBP', 'BMI', 'heartRate', 'glucose']
+    # Create a DataFrame with the correct feature names for prediction
+    features = pd.DataFrame([[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose]], columns=feature_names)
+    # Standardize the features (scaling)
+    scaled_features = scaler.transform(features)
+    # Predict probabilities
+    proba = model.predict_proba(scaled_features)[:, 1]  # Probability of class 1 (heart attack)
+    # Apply custom cutoff
+    if proba[0] >= cutoff:
+        prediction_class = 1
+    else:
+        prediction_class = 0
+    # Generate SHAP values for the prediction
+    shap_values = explainer(scaled_features)
+    # Plot SHAP values
+    plt.figure(figsize=(8, 6))
+    shap.waterfall_plot(shap_values[0])
+    plt.savefig('shap_plot.png')  # Save SHAP plot to a file
+    result = f"Predicted Probability: {proba[0]*100:.2f}%. Predicted Class with cutoff {cutoff}: {prediction_class}"
+    return result, 'shap_plot.png'  # Return the prediction and SHAP plot
+# Create the Gradio interface with preprocessing, prediction, and SHAP visualization
+with gr.Blocks() as app:
+    with gr.Row():
+        with gr.Column():
+            Gender = gr.Radio([0, 1], label="Gender (0=Female, 1=Male)")
+            cigsPerDay = gr.Slider(0, 40, step=1, label="Cigarettes per Day")
+            prevalentHyp = gr.Radio([0, 1], label="Prevalent Hypertension (0=No, 1=Yes)")
+            totChol = gr.Slider(100, 400, step=1, label="Total Cholesterol in mg/dl")
+            diaBP = gr.Slider(60, 120, step=1, label="Diastolic/Lower BP")
+            heartRate = gr.Slider(50, 120, step=1, label="Heart Rate")
+        with gr.Column():
+            age = gr.Slider(20, 80, step=1, label="Age (years)")
+            BPMeds = gr.Radio([0, 1], label="On BP Medications (0=No, 1=Yes)")
+            diabetes = gr.Radio([0, 1], label="Diabetes (0=No, 1=Yes)")
+            sysBP = gr.Slider(90, 200, step=1, label="Systolic BP/Higher BP")
+            BMI = gr.Slider(15, 40, step=0.1, label="Body Mass Index  (weight in kg/ height in meter squared)(BMI) in kg/m2")
+            glucose = gr.Slider(50, 250, step=1, label="Fasting Glucose Level")
+    # Center-aligned prediction output
+    with gr.Row():
+        gr.HTML("<div style='text-align: center; width: 100%'>Heart Attack Prediction</div>")
+    with gr.Row():
+        prediction_output = gr.Textbox(label="", interactive=False, elem_id="prediction_output")
+    with gr.Row():
+        shap_plot_output = gr.Image(label="SHAP Analysis")
+    # Link inputs and prediction output
+    submit_btn = gr.Button("Submit")
+    submit_btn.click(fn=predict_heart_attack, inputs=[Gender, age, cigsPerDay, BPMeds, prevalentHyp, diabetes, totChol, sysBP, diaBP, BMI, heartRate, glucose], outputs=[prediction_output, shap_plot_output])
+app.launch()

requirements.txt CHANGED Viewed

@@ -1,7 +1,10 @@
 gradio==5.1.0
 gradio_client==1.4.0
 joblib==1.4.2
 numpy==1.26.4
-pandas
 scikit-learn==1.4.2
-xgboost

+cloudpickle==3.1.0
 gradio==5.1.0
 gradio_client==1.4.0
 joblib==1.4.2
 numpy==1.26.4
+pandas==2.2.2
+shap==0.46.0
 scikit-learn==1.4.2
+slicer==0.0.8
+xgboost==2.0.3

shap_plot.png ADDED Viewed

test_cases ADDED Viewed

	@@ -0,0 +1,5 @@

+Gender 1 age 62.729653 cigsPerDay 0 BPMeds 0 prevalentHyp 1 diabetes 0 totChol 172 sysBP 144 diaBP 84 BMI 26 heartRate 63 glucose 78.3 Predicted Probability (first observation): 59% Predicted Class with cutoff 0.42: 1 Stored y_test, y_proba, and custom predictions to disk.
+Gender 1 age 49.000000 cigsPerDay 0 BPMeds 0 prevalentHyp 1 diabetes 0 totChol 170 sysBP 112 diaBP 79 BMI 21 heartRate 60 glucose 80
+Predicted Probability (fifth observation): 0.13% Predicted Class with cutoff 0.42: 0