|
import matplotlib.pyplot as plt |
|
import numpy as np |
|
import pandas as pd |
|
import os |
|
import tensorflow as tf |
|
from tensorflow import keras |
|
import seaborn as sns |
|
|
|
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score |
|
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve |
|
from sklearn.metrics import ConfusionMatrixDisplay |
|
|
|
from sklearn.model_selection import train_test_split |
|
from tensorflow.keras import layers, losses |
|
from tensorflow.keras.datasets import fashion_mnist |
|
from tensorflow.keras.models import Model |
|
|
|
from plotly.subplots import make_subplots |
|
import plotly.graph_objects as go |
|
|
|
from sklearn.decomposition import PCA |
|
|
|
import plotly.express as px |
|
from scipy.interpolate import griddata |
|
import sklearn |
|
from sklearn.tree import DecisionTreeClassifier |
|
from sklearn.metrics import confusion_matrix, precision_score, roc_auc_score, precision_recall_curve |
|
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_val_predict, StratifiedKFold |
|
from sentence_transformers import SentenceTransformer |
|
|
|
from sklearn import tree |
|
|
|
|
|
import gradio as gr |
|
import os |
|
import json |
|
from datetime import datetime, timedelta |
|
import shutil |
|
import random |
|
import plotly.io as pio |
|
|
|
import joblib |
|
|
|
|
|
|
|
|
|
autoencoder = keras.models.load_model('models/autoencoder') |
|
classifier = keras.models.load_model('models/classifier') |
|
decision_tree = joblib.load("models/decision_tree_model.pkl") |
|
llm_model = SentenceTransformer(r"sentence-transformers/paraphrase-MiniLM-L6-v2") |
|
|
|
pca_2d_llm_clusters = joblib.load('models/pca_llm_model.pkl') |
|
|
|
print("models loaded") |
|
|
|
|
|
|
|
|
|
|
|
dataframe = pd.read_csv('ecg.csv', header=None) |
|
dataframe[140] = dataframe[140].apply(lambda x: 1 if x==0 else 0) |
|
|
|
df_ecg = dataframe[[i for i in range(140)]] |
|
ecg_raw_data = df_ecg.values |
|
labels = dataframe.values[:, -1] |
|
ecg_data = ecg_raw_data[:, :] |
|
train_data, test_data, train_labels, test_labels = train_test_split( |
|
ecg_data, labels, test_size=0.2, random_state=21) |
|
|
|
min_val = tf.reduce_min(train_data) |
|
max_val = tf.reduce_max(train_data) |
|
|
|
print("constant computing: OK") |
|
|
|
|
|
|
|
|
|
ecg_data = (ecg_data - min_val) / (max_val - min_val) |
|
|
|
ecg_data = tf.cast(ecg_data, tf.float32) |
|
|
|
print(ecg_data.shape) |
|
X = autoencoder.encoder(ecg_data).numpy() |
|
|
|
n_components=2 |
|
pca = PCA(n_components=n_components) |
|
X_compressed = pca.fit_transform(X) |
|
|
|
|
|
column_names = [f"Feature{i + 1}" for i in range(n_components)] |
|
categories = ["normal","heart disease"] |
|
target_categorical = pd.Categorical.from_codes(labels.astype(int), categories=categories) |
|
df_compressed = pd.DataFrame(X_compressed, columns=column_names) |
|
df_compressed["target"] = target_categorical |
|
|
|
print("PCA: done") |
|
|
|
|
|
|
|
df_plot = pd.read_csv("df_mappa.csv", sep=",", header=0) |
|
print("df map for decision tree loaded.") |
|
|
|
|
|
df_pca_llm = pd.read_csv("df_PCA_llm.csv",sep=",",header=0) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def df_encoding(df): |
|
df.ExerciseAngina.replace( |
|
{ |
|
'N' : 'No', |
|
'Y' : 'exercise-induced angina' |
|
}, |
|
inplace = True |
|
) |
|
df.FastingBS.replace( |
|
{ |
|
0 : 'Not Diabetic', |
|
1 : 'High fasting blood sugar' |
|
}, |
|
inplace = True |
|
) |
|
df.Sex.replace( |
|
{ |
|
'M' : 'Man', |
|
'F' : 'Female' |
|
}, |
|
inplace = True |
|
) |
|
df.ChestPainType.replace( |
|
{ |
|
'ATA' : 'Atypical', |
|
'NAP' : 'Non-Anginal Pain', |
|
'ASY' : 'Asymptomatic', |
|
'TA' : 'Typical Angina' |
|
}, |
|
inplace = True |
|
) |
|
df.RestingECG.replace( |
|
{ |
|
'Normal' : 'Normal', |
|
'ST' : 'ST-T wave abnormality', |
|
'LVH' : 'Probable left ventricular hypertrophy' |
|
}, |
|
inplace = True |
|
) |
|
df.ST_Slope.replace( |
|
{ |
|
'Up' : 'Up', |
|
'Flat' : 'Flat', |
|
'Down' : 'Downsloping' |
|
}, |
|
inplace = True |
|
) |
|
|
|
return df |
|
|
|
|
|
|
|
def compile_text_no_target(x): |
|
|
|
|
|
text = f"""Age: {x['Age']}, |
|
Sex: {x['Sex']}, |
|
Chest Pain Type: {x['ChestPainType']}, |
|
RestingBP: {x['RestingBP']}, |
|
Cholesterol: {x['Cholesterol']}, |
|
FastingBS: {x['FastingBS']}, |
|
RestingECG: {x['RestingECG']}, |
|
MaxHR: {x['MaxHR']} |
|
Exercise Angina: {x['ExerciseAngina']}, |
|
Old peak: {x['Oldpeak']}, |
|
ST_Slope: {x['ST_Slope']} |
|
""" |
|
|
|
return text |
|
|
|
def LLM_transform(df , model = llm_model): |
|
sentences = df.apply(lambda x: compile_text_no_target(x), axis=1).tolist() |
|
|
|
|
|
|
|
|
|
|
|
output = model.encode(sentences=sentences, show_progress_bar= True, normalize_embeddings = True) |
|
|
|
df_embedding = pd.DataFrame(output) |
|
|
|
return df_embedding |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def upload_ecg(file): |
|
|
|
|
|
|
|
if len(os.listdir("current_ecg"))>0: |
|
|
|
try: |
|
for filename in os.listdir("current_ecg"): |
|
file_path = os.path.join("current_ecg", filename) |
|
if os.path.isfile(file_path): |
|
os.remove(file_path) |
|
print(f"I file nella cartella 'current_ecg' sono stati eliminati.") |
|
|
|
except Exception as e: |
|
print(f"Errore nell'eliminazione dei file: {str(e)}") |
|
|
|
|
|
|
|
df = pd.read_csv(file.name,header=None) |
|
|
|
|
|
source_directory = os.path.dirname(file.name) |
|
destination_directory = 'current_ecg' |
|
|
|
|
|
|
|
file_to_copy = os.path.basename(file.name) |
|
|
|
|
|
|
|
source_file_path = f"{source_directory}/{file_to_copy}" |
|
destination_file_path = f"{destination_directory}/{file_to_copy}" |
|
|
|
|
|
shutil.copy(source_file_path, destination_file_path) |
|
|
|
|
|
return "Your ECG is ready, you can analyze it!" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def ecg_availability(patient_name): |
|
|
|
folder_path = os.path.join("PATIENT",patient_name) |
|
status_file_path = os.path.join(folder_path, "status.json") |
|
|
|
|
|
if not os.path.isfile(status_file_path): |
|
return None |
|
|
|
|
|
with open(status_file_path, 'r') as status_file: |
|
status_data = json.load(status_file) |
|
|
|
|
|
last_datetime_str = status_data.get("last_datetime", None) |
|
|
|
|
|
csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")] |
|
|
|
if last_datetime_str is None: |
|
return f"New ECG available" |
|
|
|
last_datetime = datetime.strptime(last_datetime_str, "%B_%d_%H_%M_%S") |
|
|
|
|
|
successive_csv_files = [] |
|
for csv_file in csv_files: |
|
csv_datetime_str = csv_file.split('.')[0] |
|
csv_datetime = datetime.strptime(csv_datetime_str, "%B_%d_%H_%M_%S") |
|
|
|
|
|
if csv_datetime > last_datetime: |
|
successive_csv_files.append(csv_file) |
|
|
|
if len(successive_csv_file)>0: |
|
return f"New ECG available (last ECG: {last_datetime})" |
|
|
|
else: |
|
return f"No ECG available (last ECG: {last_datetime})" |
|
|
|
|
|
|
|
|
|
def ecg_analysis(): |
|
|
|
df = pd.read_csv(os.path.join("current_ecg",os.listdir("current_ecg")[0])) |
|
|
|
|
|
df_ecg = df[[str(i) for i in range(140)]] |
|
df_data = df_ecg.values |
|
df_data = (df_data - min_val) / (max_val - min_val) |
|
df_data = tf.cast(df_data, tf.float32) |
|
|
|
|
|
df_tree = df[["ChestPainType","ST_Slope"]].copy() |
|
|
|
df_llm = df[["Age","Sex","ChestPainType","RestingBP","Cholesterol","FastingBS","RestingECG","MaxHR","ExerciseAngina","Oldpeak","ST_Slope"]].copy() |
|
|
|
true_label = df.values[:,-1] |
|
|
|
|
|
heartbeat_encoder_preds = autoencoder.encoder(df_data).numpy() |
|
heartbeat_decoder_preds = autoencoder.decoder(heartbeat_encoder_preds).numpy() |
|
|
|
classification_res = classifier.predict(df_data) |
|
|
|
|
|
print("shapes of: encoder preds, decoder preds, classification preds/n",heartbeat_encoder_preds.shape,heartbeat_decoder_preds.shape,classification_res.shape) |
|
|
|
|
|
|
|
p_encoder_preds = heartbeat_encoder_preds[0,:] |
|
p_decoder_preds = heartbeat_decoder_preds[0,:] |
|
p_class_res = classification_res[0,:] |
|
p_true = true_label[0] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig = px.scatter(df_compressed, x='Feature1', y='Feature2', color='target', color_discrete_map={0: 'red', 1: 'blue'}, |
|
labels={'Target': 'Binary Target'},size_max=18) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig.update_layout( |
|
|
|
xaxis_title='component 1', |
|
yaxis_title='component 2' |
|
) |
|
|
|
|
|
new_point_compressed = pca.transform(p_encoder_preds.reshape(1,-1)) |
|
|
|
new_point = {'X':[new_point_compressed[0][0]] , 'Y':[new_point_compressed[0][1]] } |
|
|
|
new_point_df = pd.DataFrame(new_point) |
|
|
|
|
|
fig.add_trace(go.Scatter( |
|
x=new_point_df['X'], |
|
y=new_point_df['Y'], |
|
mode='markers', |
|
marker=dict(symbol='star', color='black', size=15), |
|
name='actual patient' |
|
)) |
|
|
|
d = fig.to_dict() |
|
d["data"][0]["type"] = "scatter" |
|
|
|
fig=go.Figure(d) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig_reconstruction = go.Figure() |
|
|
|
sns.set(font_scale=2) |
|
sns.set_style("white") |
|
|
|
|
|
fig_reconstruction.add_trace( |
|
go.Scatter(x=np.arange(140), y=df_data[0], fill=None, mode='lines', name='Input', line=dict(color='black', width=3))) |
|
fig_reconstruction.add_trace( |
|
go.Scatter(x=np.arange(140), y=heartbeat_decoder_preds[0], fill=None, mode='lines', name='Reconstruction', |
|
line=dict(color='red', width=3))) |
|
|
|
|
|
fill_x = list(np.arange(140)) + list(reversed(np.arange(140))) |
|
fill_y = list(heartbeat_decoder_preds[0]) + list(reversed(df_data[0])) |
|
fig_reconstruction.add_trace(go.Scatter(x=fill_x, y=fill_y, fill='tozeroy', fillcolor='rgba(255, 182, 193, 10.0)', mode='lines', line=dict(color='rgba(255, 182, 193, 0.5)', width=0), name='Error')) |
|
|
|
|
|
fig_reconstruction.update_layout( |
|
legend=dict( |
|
x=1.1, |
|
y=1.05, |
|
) |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
encoded_features = ['ST_Slope_Up', 'ST_Slope_Flat', 'ST_Slope_Down', 'ChestPainType_ASY', 'ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA'] |
|
|
|
X_plot = pd.DataFrame(columns=encoded_features) |
|
|
|
for k in range(len(df_tree['ST_Slope'])): |
|
X_plot.loc[k] = 0 |
|
if df_tree['ST_Slope'][k] == 'Up': |
|
X_plot['ST_Slope_Up'][k] = 1 |
|
if df_tree['ST_Slope'][k] == 'Flat': |
|
X_plot['ST_Slope_Flat'][k] = 1 |
|
if df_tree['ST_Slope'][k] == 'Down': |
|
X_plot['ST_Slope_Down'][k] = 1 |
|
if df_tree['ChestPainType'][k] == 'ASY': |
|
X_plot['ChestPainType_ASY'][k] = 1 |
|
if df_tree['ChestPainType'][k] == 'ATA': |
|
X_plot['ChestPainType_ATA'][k] = 1 |
|
if df_tree['ChestPainType'][k] == 'NAP': |
|
X_plot['ChestPainType_NAP'][k] = 1 |
|
if df_tree['ChestPainType'][k] == 'TA': |
|
X_plot['ChestPainType_TA'][k] = 1 |
|
|
|
|
|
|
|
y_score = decision_tree.predict_proba(X_plot)[:,1] |
|
|
|
chest_pain = [] |
|
slop = [] |
|
|
|
for k in range(len(X_plot)): |
|
if X_plot['ChestPainType_ASY'][k] == 1 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0: |
|
chest_pain.append(0) |
|
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 1 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0: |
|
chest_pain.append(1) |
|
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 1 and X_plot['ChestPainType_TA'][k] == 0: |
|
chest_pain.append(2) |
|
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 1: |
|
chest_pain.append(3) |
|
if X_plot['ST_Slope_Up'][k] == 1 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 0: |
|
slop.append(0) |
|
if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 1 and X_plot['ST_Slope_Down'][k] == 0: |
|
slop.append(1) |
|
if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 1: |
|
slop.append(2) |
|
|
|
|
|
|
|
fig_tree = plt.figure() |
|
x1 = np.linspace(df_plot['ST_Slope'].min()-0.5, df_plot['ST_Slope'].max()+0.5) |
|
x2 = np.linspace(df_plot['ChestPainType'].min()-0.5, df_plot['ChestPainType'].max()+0.5) |
|
X1, X2 = np.meshgrid(x1, x2) |
|
|
|
|
|
points = df_plot[['ST_Slope', 'ChestPainType']].values |
|
values = df_plot['Prob'].values |
|
Z = griddata(points, values, (X1, X2), method='nearest') |
|
|
|
|
|
plt.contourf(X1, X2, Z, cmap='coolwarm', levels=10) |
|
plt.colorbar(label='Predicted Probability') |
|
|
|
|
|
plt.scatter(slop[:1], chest_pain[:1], c="k", cmap='coolwarm', edgecolor='k', marker='o', label=f'prob={y_score[:1].round(3)}') |
|
|
|
|
|
plt.xticks([]) |
|
plt.yticks([]) |
|
|
|
|
|
plt.text(0.0, -0.7, "Up", ha='center',fontsize=15) |
|
plt.text(1.00, -0.7, "Flat", ha='center',fontsize=15) |
|
plt.text(2.00, -0.7, "Down", ha='center',fontsize=15) |
|
plt.text(-0.62, 0.0, "ASY", rotation='vertical', va='center',fontsize=15) |
|
plt.text(-0.62, 1.00, "ATA", rotation='vertical', va='center',fontsize=15) |
|
plt.text(-0.62, 2.0, "NAP", rotation='vertical', va='center',fontsize=15) |
|
plt.text(-0.62, 3.0, "TA", rotation='vertical', va='center',fontsize=15) |
|
|
|
|
|
plt.xlabel('ST_Slope', fontsize=15, labelpad=20) |
|
plt.ylabel('ChestPainType', fontsize=15, labelpad=20) |
|
|
|
|
|
|
|
|
|
|
|
|
|
df_llm_encoding = df_encoding(df_llm) |
|
df_point_LLM = LLM_transform(df_llm_encoding) |
|
|
|
df_point_LLM.columns = [str(column) for column in df_point_LLM.columns] |
|
|
|
pca_llm_point = pca_2d_llm_clusters.transform(df_point_LLM) |
|
pca_llm_point.columns = ["comp1", "comp2"] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fig_llm_cluster = go.Figure() |
|
|
|
for cluster in df_pca_llm['cluster'].unique(): |
|
cluster_data = df_pca_llm[df_pca_llm['cluster'] == cluster] |
|
fig_llm_cluster.add_trace( |
|
go.Scatter(x=cluster_data['comp1'], y=cluster_data['comp2'], mode='markers', name=f'Cluster {cluster}')) |
|
|
|
|
|
fig_llm_cluster.update_traces(marker=dict(size=12)) |
|
|
|
|
|
fig_llm_cluster.update_xaxes(title_text="Principal Component 1") |
|
fig_llm_cluster.update_yaxes(title_text="Principal Component 2") |
|
|
|
|
|
fig_llm_cluster.add_trace( |
|
go.Scatter(x=pca_llm_point['comp1'], y=pca_llm_point['comp2'], mode='markers', name='Patient', |
|
marker=dict(size=12, symbol='diamond', line=dict(width=2, color='Black')))) |
|
|
|
|
|
fig_llm_cluster.update_layout( |
|
legend=dict( |
|
x=1.05, |
|
y=1 |
|
) |
|
) |
|
|
|
|
|
fig_llm_cluster.update_xaxes(showgrid=False) |
|
fig_llm_cluster.update_yaxes(showgrid=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return fig, fig_reconstruction , f"Heart disease probability: {int(p_class_res[0]*100)} %" , fig_tree , f"Heart disease probability: {int(y_score[0]*100)} %" , fig_llm_cluster |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="TIQUE - AI DEMO CAPABILITIES") as demo: |
|
|
|
|
|
|
|
|
|
|
|
gr.Markdown("<h1><center>TIQUE: AI DEMO CAPABILITIES<center><h1>") |
|
|
|
|
|
with gr.Row(): |
|
|
|
pazienti = ["Elisabeth Smith","Michael Mims"] |
|
menu_pazienti = gr.Dropdown(choices=pazienti,label="patients") |
|
|
|
available_ecg_result = gr.Textbox() |
|
|
|
|
|
menu_pazienti.input(ecg_availability, inputs=[menu_pazienti], outputs=[available_ecg_result]) |
|
|
|
with gr.Row(): |
|
|
|
input_file = gr.UploadButton("Upload patient's data and latest ECG π") |
|
text_upload_results = gr.Textbox() |
|
|
|
input_file.upload(upload_ecg,inputs=[input_file],outputs=text_upload_results) |
|
|
|
with gr.Row(): |
|
ecg_start_analysis_button = gr.Button(value="Start data analysis",scale=1) |
|
|
|
|
|
gr.Markdown("## Patient positioning on clusters") |
|
|
|
with gr.Row(): |
|
|
|
llm_cluster = gr.Plot() |
|
|
|
|
|
gr.Markdown("## ECG analysis:") |
|
|
|
with gr.Row(): |
|
|
|
with gr.Column(): |
|
|
|
latent_space_representation = gr.Plot() |
|
|
|
with gr.Column(): |
|
|
|
autoencoder_ecg_reconstruction = gr.Plot() |
|
|
|
classifier_nn_prediction = gr.Textbox() |
|
|
|
gr.Markdown("## Patient's classification based on Chest Pain Type and ST Slope:") |
|
|
|
with gr.Row(): |
|
|
|
decision_tree_plot = gr.Plot() |
|
|
|
decision_tree_proba = gr.Textbox() |
|
|
|
|
|
|
|
|
|
ecg_start_analysis_button.click(fn=ecg_analysis, inputs=None, outputs=[latent_space_representation, |
|
autoencoder_ecg_reconstruction, |
|
classifier_nn_prediction,decision_tree_plot, decision_tree_proba, |
|
llm_cluster]) |
|
if __name__ == "__main__": |
|
demo.launch() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|