Spaces:

alessio21
/

ecg

Sleeping

App Files Files Community

alessio21 commited on Nov 2, 2023

Commit

36c3e63

1 Parent(s): 95eddc1

Upload run.ipynb

Browse files

Files changed (1) hide show

run.ipynb +653 -0

run.ipynb ADDED Viewed

	@@ -0,0 +1,653 @@

+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": 3,
+      "metadata": {
+        "id": "yowZ_FwQ53s6"
+      },
+      "outputs": [],
+      "source": [
+        "!pip install -q seaborn plotly sentence-transformers prince gradio==3.41.2"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "import matplotlib.pyplot as plt\n",
+        "import numpy as np\n",
+        "import pandas as pd\n",
+        "import os\n",
+        "import tensorflow as tf\n",
+        "from tensorflow import keras\n",
+        "import seaborn as sns\n",
+        "\n",
+        "from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score\n",
+        "from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve\n",
+        "from sklearn.metrics import ConfusionMatrixDisplay\n",
+        "\n",
+        "from sklearn.model_selection import train_test_split\n",
+        "from tensorflow.keras import layers, losses\n",
+        "from tensorflow.keras.datasets import fashion_mnist\n",
+        "from tensorflow.keras.models import Model\n",
+        "\n",
+        "from plotly.subplots import make_subplots\n",
+        "import plotly.graph_objects as go\n",
+        "\n",
+        "from sklearn.decomposition import PCA\n",
+        "\n",
+        "import plotly.express as px\n",
+        "from scipy.interpolate import griddata\n",
+        "import sklearn\n",
+        "from sklearn.tree import DecisionTreeClassifier\n",
+        "from sklearn.metrics import confusion_matrix, precision_score, roc_auc_score, precision_recall_curve\n",
+        "from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_val_predict, StratifiedKFold\n",
+        "from sentence_transformers import SentenceTransformer\n",
+        "\n",
+        "from sklearn import tree\n",
+        "\n",
+        "\n",
+        "import gradio as gr\n",
+        "import os\n",
+        "import json\n",
+        "from datetime import datetime, timedelta\n",
+        "import shutil\n",
+        "import random\n",
+        "import plotly.io as pio\n",
+        "\n",
+        "import joblib\n",
+        "\n",
+        "\n",
+        "\n",
+        "#load models\n",
+        "autoencoder = keras.models.load_model('models/autoencoder')\n",
+        "classifier = keras.models.load_model('models/classifier')\n",
+        "decision_tree = joblib.load(\"models/decision_tree_model.pkl\")\n",
+        "llm_model = SentenceTransformer(r\"sentence-transformers/paraphrase-MiniLM-L6-v2\")\n",
+        "\n",
+        "pca_2d_llm_clusters = joblib.load('models/pca_llm_model.pkl')\n",
+        "\n",
+        "print(\"models loaded\")\n",
+        "\n",
+        "\n",
+        "\n",
+        "#compute training dataset constant (min and max) for data normalization\n",
+        "\n",
+        "dataframe = pd.read_csv('ecg.csv', header=None)\n",
+        "dataframe[140] = dataframe[140].apply(lambda x: 1 if x==0 else 0)\n",
+        "\n",
+        "df_ecg = dataframe[[i for i in range(140)]]\n",
+        "ecg_raw_data = df_ecg.values\n",
+        "labels = dataframe.values[:, -1]\n",
+        "ecg_data = ecg_raw_data[:, :]\n",
+        "train_data, test_data, train_labels, test_labels = train_test_split(\n",
+        "    ecg_data, labels, test_size=0.2, random_state=21)\n",
+        "\n",
+        "min_val = tf.reduce_min(train_data)\n",
+        "max_val = tf.reduce_max(train_data)\n",
+        "\n",
+        "print(\"constant computing: OK\")\n",
+        "\n",
+        "\n",
+        "#compute PCA for latent space representation\n",
+        "\n",
+        "ecg_data = (ecg_data - min_val) / (max_val - min_val)\n",
+        "\n",
+        "ecg_data = tf.cast(ecg_data, tf.float32)\n",
+        "\n",
+        "print(ecg_data.shape)\n",
+        "X = autoencoder.encoder(ecg_data).numpy()\n",
+        "\n",
+        "n_components=2\n",
+        "pca = PCA(n_components=n_components)\n",
+        "X_compressed = pca.fit_transform(X)\n",
+        "\n",
+        "\n",
+        "column_names = [f\"Feature{i + 1}\" for i in range(n_components)]\n",
+        "categories = [\"normal\",\"heart disease\"]\n",
+        "target_categorical = pd.Categorical.from_codes(labels.astype(int), categories=categories)\n",
+        "df_compressed = pd.DataFrame(X_compressed, columns=column_names)\n",
+        "df_compressed[\"target\"] = target_categorical\n",
+        "\n",
+        "print(\"PCA: done\")\n",
+        "\n",
+        "\n",
+        "#load dataset for decision tree map plot\n",
+        "df_plot = pd.read_csv(\"df_mappa.csv\", sep=\",\", header=0)\n",
+        "print(\"df map for decision tree loaded.\")\n",
+        "\n",
+        "#load dataset form llm pca\n",
+        "df_pca_llm = pd.read_csv(\"df_PCA_llm.csv\",sep=\",\",header=0)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "#useful functions\n",
+        "\n",
+        "def df_encoding(df):\n",
+        "    df.ExerciseAngina.replace(\n",
+        "    {\n",
+        "       'N' : 'No',\n",
+        "       'Y' : 'exercise-induced angina'\n",
+        "    },\n",
+        "    inplace = True\n",
+        "    )\n",
+        "    df.FastingBS.replace(\n",
+        "        {\n",
+        "           0 : 'Not Diabetic',\n",
+        "           1 : 'High fasting blood sugar'\n",
+        "        },\n",
+        "        inplace = True\n",
+        "    )\n",
+        "    df.Sex.replace(\n",
+        "        {\n",
+        "           'M' : 'Man',\n",
+        "           'F' : 'Female'\n",
+        "        },\n",
+        "        inplace = True\n",
+        "    )\n",
+        "    df.ChestPainType.replace(\n",
+        "        {\n",
+        "           'ATA' : 'Atypical',\n",
+        "           'NAP' : 'Non-Anginal Pain',\n",
+        "           'ASY' : 'Asymptomatic',\n",
+        "            'TA' : 'Typical Angina'\n",
+        "        },\n",
+        "        inplace = True\n",
+        "    )\n",
+        "    df.RestingECG.replace(\n",
+        "        {\n",
+        "           'Normal' : 'Normal',\n",
+        "               'ST' : 'ST-T wave abnormality',\n",
+        "              'LVH' : 'Probable left ventricular hypertrophy'\n",
+        "        },\n",
+        "        inplace = True\n",
+        "    )\n",
+        "    df.ST_Slope.replace(\n",
+        "        {\n",
+        "              'Up' : 'Up',\n",
+        "            'Flat' : 'Flat',\n",
+        "            'Down' : 'Downsloping'\n",
+        "        },\n",
+        "        inplace = True\n",
+        "    )\n",
+        "\n",
+        "    return df\n",
+        "\n",
+        "\n",
+        "\n",
+        "def compile_text_no_target(x):\n",
+        "\n",
+        "\n",
+        "    text =  f\"\"\"Age: {x['Age']},\n",
+        "                Sex: {x['Sex']},\n",
+        "                Chest Pain Type: {x['ChestPainType']},\n",
+        "                RestingBP: {x['RestingBP']},\n",
+        "                Cholesterol: {x['Cholesterol']},\n",
+        "                FastingBS: {x['FastingBS']},\n",
+        "                RestingECG: {x['RestingECG']},\n",
+        "                MaxHR: {x['MaxHR']}\n",
+        "                Exercise Angina: {x['ExerciseAngina']},\n",
+        "                Old peak: {x['Oldpeak']},\n",
+        "                ST_Slope: {x['ST_Slope']}\n",
+        "                \"\"\"\n",
+        "\n",
+        "    return text\n",
+        "\n",
+        "def LLM_transform(df , model = llm_model):\n",
+        "    sentences = df.apply(lambda x: compile_text_no_target(x), axis=1).tolist()\n",
+        "\n",
+        "\n",
+        "\n",
+        "    #model = SentenceTransformer(r\"sentence-transformers/paraphrase-MiniLM-L6-v2\")\n",
+        "\n",
+        "    output = model.encode(sentences=sentences, show_progress_bar= True, normalize_embeddings  = True)\n",
+        "\n",
+        "    df_embedding = pd.DataFrame(output)\n",
+        "\n",
+        "    return df_embedding\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def upload_ecg(file):\n",
+        "\n",
+        "\n",
+        "\n",
+        "    if len(os.listdir(\"current_ecg\"))>0: # se ci sono file nella cartella, eliminali\n",
+        "\n",
+        "        try:\n",
+        "            for filename in os.listdir(\"current_ecg\"):\n",
+        "                file_path = os.path.join(\"current_ecg\", filename)\n",
+        "                if os.path.isfile(file_path):\n",
+        "                    os.remove(file_path)\n",
+        "            print(f\"I file nella cartella 'current_ecg' sono stati eliminati.\")\n",
+        "\n",
+        "        except Exception as e:\n",
+        "            print(f\"Errore nell'eliminazione dei file: {str(e)}\")\n",
+        "\n",
+        "\n",
+        "\n",
+        "    df = pd.read_csv(file.name,header=None) #file.name è il path temporaneo del file caricato\n",
+        "\n",
+        "\n",
+        "    source_directory = os.path.dirname(file.name)  # Replace with the source directory path\n",
+        "    destination_directory = 'current_ecg'  # Replace with the destination directory path\n",
+        "\n",
+        "\n",
+        "    # Specify the filename (including the extension) of the CSV file you want to copy\n",
+        "    file_to_copy = os.path.basename(file.name) # Replace with the actual filename\n",
+        "\n",
+        "\n",
+        "    # Construct the full source and destination file paths\n",
+        "    source_file_path = f\"{source_directory}/{file_to_copy}\"\n",
+        "    destination_file_path = f\"{destination_directory}/{file_to_copy}\"\n",
+        "\n",
+        "    # Copy the file from the source directory to the destination directory\n",
+        "    shutil.copy(source_file_path, destination_file_path)\n",
+        "\n",
+        "\n",
+        "    return \"Your ECG is ready, you can analyze it!\"\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def ecg_availability(patient_name):\n",
+        "\n",
+        "    folder_path = os.path.join(\"PATIENT\",patient_name)\n",
+        "    status_file_path = os.path.join(folder_path, \"status.json\")\n",
+        "\n",
+        "    # Check if the \"status.json\" file exists\n",
+        "    if not os.path.isfile(status_file_path):\n",
+        "        return None  # If the file doesn't exist, return None\n",
+        "\n",
+        "    # Load the JSON data from the \"status.json\" file\n",
+        "    with open(status_file_path, 'r') as status_file:\n",
+        "        status_data = json.load(status_file)\n",
+        "\n",
+        "    # Extract the last datetime from the status JSON (if available)\n",
+        "    last_datetime_str = status_data.get(\"last_datetime\", None)\n",
+        "\n",
+        "    # Get the list of CSV files in the folder\n",
+        "    csv_files = [f for f in os.listdir(folder_path) if f.endswith(\".csv\")]\n",
+        "\n",
+        "    if last_datetime_str is None:\n",
+        "        return f\"New ECG available\"  # If the JSON is empty, return all CSV files\n",
+        "\n",
+        "    last_datetime = datetime.strptime(last_datetime_str, \"%B_%d_%H_%M_%S\")\n",
+        "\n",
+        "    # Find successive CSV files\n",
+        "    successive_csv_files = []\n",
+        "    for csv_file in csv_files:\n",
+        "        csv_datetime_str = csv_file.split('.')[0]\n",
+        "        csv_datetime = datetime.strptime(csv_datetime_str, \"%B_%d_%H_%M_%S\")\n",
+        "\n",
+        "        # Check if the CSV datetime is successive to the last saved datetime\n",
+        "        if csv_datetime > last_datetime:\n",
+        "            successive_csv_files.append(csv_file)\n",
+        "\n",
+        "    if len(successive_csv_file)>0:\n",
+        "        return f\"New ECG available (last ECG: {last_datetime})\"\n",
+        "\n",
+        "    else:\n",
+        "        return f\"No ECG available (last ECG: {last_datetime})\"\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "def ecg_analysis():\n",
+        "\n",
+        "    df = pd.read_csv(os.path.join(\"current_ecg\",os.listdir(\"current_ecg\")[0]))\n",
+        "\n",
+        "\n",
+        "    df_ecg = df[[str(i) for i in range(140)]] #ecg data columns\n",
+        "    df_data = df_ecg.values #raw data. shape: (n_rows , 140)\n",
+        "    df_data = (df_data - min_val) / (max_val - min_val)\n",
+        "    df_data = tf.cast(df_data, tf.float32) #raw data. shape: (n_rows , 140)\n",
+        "\n",
+        "\n",
+        "    df_tree = df[[\"ChestPainType\",\"ST_Slope\"]].copy() #dataset for decision tree\n",
+        "\n",
+        "    df_llm = df[[\"Age\",\"Sex\",\"ChestPainType\",\"RestingBP\",\"Cholesterol\",\"FastingBS\",\"RestingECG\",\"MaxHR\",\"ExerciseAngina\",\"Oldpeak\",\"ST_Slope\"]].copy() # dataset for LLM\n",
+        "\n",
+        "    true_label = df.values[:,-1]\n",
+        "\n",
+        "    # ----------------ECG ANALYSIS WITH AUTOENCODER-------------------------------\n",
+        "    heartbeat_encoder_preds = autoencoder.encoder(df_data).numpy() #encoder  data representation. shape: (n_rows , 8)\n",
+        "    heartbeat_decoder_preds = autoencoder.decoder(heartbeat_encoder_preds).numpy() #decoder data reconstruction. shape: (n_rows , 140)\n",
+        "\n",
+        "    classification_res = classifier.predict(df_data) #shape: (n_rows , 1)\n",
+        "\n",
+        "\n",
+        "    print(\"shapes of: encoder preds, decoder preds, classification preds/n\",heartbeat_encoder_preds.shape,heartbeat_decoder_preds.shape,classification_res.shape)\n",
+        "\n",
+        "    #heartbeat_indexes = [i for i, pred in enumerate(classification_res) if pred == 0]\n",
+        "\n",
+        "    p_encoder_preds = heartbeat_encoder_preds[0,:] #encoder representation of the chosen row\n",
+        "    p_decoder_preds = heartbeat_decoder_preds[0,:] #decoder reconstruction of the chosen row\n",
+        "    p_class_res = classification_res[0,:] # classification res of the chosen row\n",
+        "    p_true = true_label[0]\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "    #LATENT SPACE PLOT\n",
+        "\n",
+        "    # Create the scatter plot\n",
+        "    fig = px.scatter(df_compressed, x='Feature1', y='Feature2', color='target', color_discrete_map={0: 'red', 1: 'blue'},\n",
+        "                     labels={'Target': 'Binary Target'},size_max=18)\n",
+        "\n",
+        "\n",
+        "    # Disable hover information\n",
+        "    # fig.update_traces(mode=\"markers\",\n",
+        "    #                   hovertemplate = None,\n",
+        "    #                   hoverinfo = \"skip\")\n",
+        "\n",
+        "    # Customize the plot layout\n",
+        "    fig.update_layout(\n",
+        "        title='Latent space 2D (PCA reduction)',\n",
+        "        xaxis_title='component 1',\n",
+        "        yaxis_title='component 2'\n",
+        "    )\n",
+        "\n",
+        "    # add new point\n",
+        "    new_point_compressed = pca.transform(p_encoder_preds.reshape(1,-1))\n",
+        "\n",
+        "    new_point = {'X':[new_point_compressed[0][0]] , 'Y':[new_point_compressed[0][1]] }  # Target value 2 for the new point\n",
+        "\n",
+        "    new_point_df = pd.DataFrame(new_point)\n",
+        "\n",
+        "    #fig.add_trace(px.scatter(new_point_df, x='X', y='Y').data[0])\n",
+        "    fig.add_trace(go.Scatter(\n",
+        "        x=new_point_df['X'],\n",
+        "        y=new_point_df['Y'],\n",
+        "        mode='markers',\n",
+        "        marker=dict(symbol='star', color='black', size=15),\n",
+        "        name='actual patient'\n",
+        "    ))\n",
+        "\n",
+        "    d = fig.to_dict()\n",
+        "    d[\"data\"][0][\"type\"] = \"scatter\"\n",
+        "\n",
+        "    fig=go.Figure(d)\n",
+        "\n",
+        "\n",
+        "\n",
+        "    # DECODER RECONSTRUCTION PLOT\n",
+        "\n",
+        "    fig_reconstruction = plt.figure(figsize=(10,8))\n",
+        "    sns.set(font_scale = 2)\n",
+        "    sns.set_style(\"white\")\n",
+        "    plt.plot(df_data[0], 'black',linewidth=2)\n",
+        "    plt.plot(heartbeat_decoder_preds[0], 'red',linewidth=2)\n",
+        "    plt.fill_between(np.arange(140), heartbeat_decoder_preds[0], df_data[0], color='lightcoral')\n",
+        "    plt.legend(labels=[\"Input\", \"Reconstruction\", \"Error\"])\n",
+        "\n",
+        "    #classification probability\n",
+        "\n",
+        "    # ----------DECISION TREE ANALYSIS---------------------------------\n",
+        "\n",
+        "\n",
+        "    # Define the desired column order\n",
+        "    encoded_features = ['ST_Slope_Up', 'ST_Slope_Flat', 'ST_Slope_Down', 'ChestPainType_ASY', 'ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA'] #il modello vuole le colonne in un determinato ordine\n",
+        "\n",
+        "    X_plot = pd.DataFrame(columns=encoded_features)\n",
+        "\n",
+        "    for k in range(len(df_tree['ST_Slope'])):\n",
+        "        X_plot.loc[k] = 0\n",
+        "        if df_tree['ST_Slope'][k] == 'Up':\n",
+        "            X_plot['ST_Slope_Up'][k] = 1\n",
+        "        if df_tree['ST_Slope'][k] == 'Flat':\n",
+        "            X_plot['ST_Slope_Flat'][k] = 1\n",
+        "        if df_tree['ST_Slope'][k] == 'Down':\n",
+        "            X_plot['ST_Slope_Down'][k] = 1\n",
+        "        if df_tree['ChestPainType'][k] == 'ASY':\n",
+        "            X_plot['ChestPainType_ASY'][k] = 1\n",
+        "        if df_tree['ChestPainType'][k] == 'ATA':\n",
+        "            X_plot['ChestPainType_ATA'][k] = 1\n",
+        "        if df_tree['ChestPainType'][k] == 'NAP':\n",
+        "            X_plot['ChestPainType_NAP'][k] = 1\n",
+        "        if df_tree['ChestPainType'][k] == 'TA':\n",
+        "            X_plot['ChestPainType_TA'][k] = 1\n",
+        "\n",
+        "\n",
+        "    #model prediction\n",
+        "    y_score = decision_tree.predict_proba(X_plot)[:,1]\n",
+        "\n",
+        "    chest_pain = []\n",
+        "    slop = []\n",
+        "\n",
+        "    for k in range(len(X_plot)):\n",
+        "        if X_plot['ChestPainType_ASY'][k] == 1 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0:\n",
+        "            chest_pain.append(0)\n",
+        "        if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 1 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0:\n",
+        "            chest_pain.append(1)\n",
+        "        if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 1 and X_plot['ChestPainType_TA'][k] == 0:\n",
+        "            chest_pain.append(2)\n",
+        "        if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 1:\n",
+        "            chest_pain.append(3)\n",
+        "        if X_plot['ST_Slope_Up'][k] == 1 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 0:\n",
+        "            slop.append(0)\n",
+        "        if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 1 and X_plot['ST_Slope_Down'][k] == 0:\n",
+        "            slop.append(1)\n",
+        "        if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 1:\n",
+        "            slop.append(2)\n",
+        "\n",
+        "\n",
+        "    # Create a structured grid\n",
+        "    fig_tree = plt.figure()\n",
+        "    x1 = np.linspace(df_plot['ST_Slope'].min()-0.5, df_plot['ST_Slope'].max()+0.5)\n",
+        "    x2 = np.linspace(df_plot['ChestPainType'].min()-0.5, df_plot['ChestPainType'].max()+0.5)\n",
+        "    X1, X2 = np.meshgrid(x1, x2)\n",
+        "\n",
+        "    # Interpolate the 'Prob' values onto the grid\n",
+        "    points = df_plot[['ST_Slope', 'ChestPainType']].values\n",
+        "    values = df_plot['Prob'].values\n",
+        "    Z = griddata(points, values, (X1, X2), method='nearest')\n",
+        "\n",
+        "    # Create the contour plot with regions colored by interpolated 'Prob'\n",
+        "    plt.contourf(X1, X2, Z, cmap='coolwarm', levels=10)\n",
+        "    plt.colorbar(label='Predicted Probability')\n",
+        "\n",
+        "    # Add data points if needed\n",
+        "    plt.scatter(slop[:1], chest_pain[:1], c=\"k\", cmap='coolwarm', edgecolor='k', marker='o', label=f'prob={y_score[:1].round(3)}')\n",
+        "\n",
+        "    # Remove the numerical labels from the x and y axes\n",
+        "    plt.xticks([])\n",
+        "    plt.yticks([])\n",
+        "\n",
+        "    # Add custom labels \"0\" and \"1\" near the center of the axis\n",
+        "    plt.text(0.0, -0.7, \"Up\", ha='center',fontsize=15)\n",
+        "    plt.text(1.00, -0.7, \"Flat\", ha='center',fontsize=15)\n",
+        "    plt.text(2.00, -0.7, \"Down\", ha='center',fontsize=15)\n",
+        "    plt.text(-0.62, 0.0, \"ASY\", rotation='vertical', va='center',fontsize=15)\n",
+        "    plt.text(-0.62, 1.00, \"ATA\", rotation='vertical', va='center',fontsize=15)\n",
+        "    plt.text(-0.62, 2.0, \"NAP\", rotation='vertical', va='center',fontsize=15)\n",
+        "    plt.text(-0.62, 3.0, \"TA\", rotation='vertical', va='center',fontsize=15)\n",
+        "\n",
+        "    # Add labels and title\n",
+        "    plt.xlabel('ST_Slope', fontsize=15, labelpad=20)\n",
+        "    plt.ylabel('ChestPainType', fontsize=15, labelpad=20)\n",
+        "    #plt.legend()\n",
+        "\n",
+        "\n",
+        "\n",
+        "    # ------------LLM ANALYSIS------------------------------------\n",
+        "\n",
+        "    df_llm_encoding = df_encoding(df_llm)\n",
+        "    df_point_LLM = LLM_transform(df_llm_encoding)\n",
+        "\n",
+        "    df_point_LLM.columns = [str(column) for column in df_point_LLM.columns]\n",
+        "\n",
+        "    pca_llm_point = pca_2d_llm_clusters.transform(df_point_LLM)\n",
+        "    pca_llm_point.columns = [\"comp1\", \"comp2\"]\n",
+        "\n",
+        "\n",
+        "    #clusters\n",
+        "\n",
+        "    fig_llm_cluster = plt.figure()\n",
+        "    x = df_pca_llm['comp1']\n",
+        "    y = df_pca_llm['comp2']\n",
+        "\n",
+        "    labels = ['Cluster 0', 'Cluster 1', 'Cluster 2', 'Cluster 3']\n",
+        "\n",
+        "    # Create a dictionary to map 'RestingECG' values to colors\n",
+        "    color_mapping = {0: 'r', 1: 'b', 2: 'g', 3: 'y'}\n",
+        "\n",
+        "    for i in df_pca_llm['cluster'].unique():\n",
+        "        color = color_mapping.get(i, 'k')  # Use 'k' (black) for undefined values\n",
+        "        plt.scatter(x[df_pca_llm['cluster'] == i], y[df_pca_llm['cluster'] == i], c=color, label=labels[i])\n",
+        "\n",
+        "    plt.scatter(pca_llm_point['comp1'], pca_llm_point['comp1'], c='k', marker='D')\n",
+        "\n",
+        "    # Remove the numerical labels from the x and y axes\n",
+        "    plt.xticks([])\n",
+        "    plt.yticks([])\n",
+        "\n",
+        "    plt.xlabel('Principal Component 1')\n",
+        "    plt.ylabel('Principal Component 2')\n",
+        "    plt.legend()\n",
+        "    plt.grid(False)\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "    return fig, fig_reconstruction , f\"Heart disease probability: {int(p_class_res[0]*100)} %\" , fig_tree , f\"Heart disease probability: {int(y_score[0]*100)} %\" , fig_llm_cluster\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "#demo app\n",
+        "\n",
+        "with gr.Blocks(title=\"TIQUE - AI DEMO CAPABILITIES\") as demo:\n",
+        "\n",
+        "    gr.Markdown(\"<h1><center>TIQUE: AI DEMO CAPABILITIES<center><h1>\")\n",
+        "\n",
+        "\n",
+        "    with gr.Row():\n",
+        "\n",
+        "        pazienti = [\"Elisabeth Smith\",\"Michael Mims\"]\n",
+        "        menu_pazienti = gr.Dropdown(choices=pazienti,label=\"patients\")\n",
+        "\n",
+        "        available_ecg_result = gr.Textbox()\n",
+        "\n",
+        "\n",
+        "        menu_pazienti.input(ecg_availability, inputs=[menu_pazienti], outputs=[available_ecg_result])\n",
+        "\n",
+        "    with gr.Row():\n",
+        "\n",
+        "        input_file = gr.UploadButton(\"Click to Upload an ECG 📁\")\n",
+        "        text_upload_results = gr.Textbox()\n",
+        "\n",
+        "        input_file.upload(upload_ecg,inputs=[input_file],outputs=text_upload_results)\n",
+        "\n",
+        "    with gr.Row():\n",
+        "        ecg_start_analysis_button = gr.Button(value=\"Start ECG analysis\",scale=1)\n",
+        "\n",
+        "\n",
+        "    gr.Markdown(\"## Large Language Model clustering\")\n",
+        "\n",
+        "    with gr.Row():\n",
+        "\n",
+        "        llm_cluster = gr.Plot()\n",
+        "\n",
+        "\n",
+        "    gr.Markdown(\"## Autoencoder results:\")\n",
+        "\n",
+        "    with gr.Row():\n",
+        "\n",
+        "        with gr.Column():\n",
+        "\n",
+        "            latent_space_representation = gr.Plot()\n",
+        "\n",
+        "        with gr.Column():\n",
+        "\n",
+        "            autoencoder_ecg_reconstruction = gr.Plot()\n",
+        "\n",
+        "            classifier_nn_prediction = gr.Textbox()\n",
+        "\n",
+        "    gr.Markdown(\"## Decision Tree results:\")\n",
+        "\n",
+        "    with gr.Row():\n",
+        "\n",
+        "        decision_tree_plot = gr.Plot()\n",
+        "\n",
+        "        decision_tree_proba = gr.Textbox()\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "    ecg_start_analysis_button.click(fn=ecg_analysis, inputs=None, outputs=[latent_space_representation,\n",
+        "                                                                            autoencoder_ecg_reconstruction,\n",
+        "                                                                            classifier_nn_prediction,decision_tree_plot, decision_tree_proba,\n",
+        "                                                                           llm_cluster])\n",
+        "if __name__ == \"__main__\":\n",
+        "    demo.launch()\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n"
+      ],
+      "metadata": {
+        "id": "bVSujh5-677-"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}