import matplotlib.pyplot as plt |
import numpy as np |
import pandas as pd |
import os |
import tensorflow as tf |
from tensorflow import keras |
import seaborn as sns |
from sklearn.metrics import accuracy_score, precision_score, recall_score, roc_auc_score |
from sklearn.metrics import f1_score, confusion_matrix, precision_recall_curve, roc_curve |
from sklearn.metrics import ConfusionMatrixDisplay |
from sklearn.model_selection import train_test_split |
from tensorflow.keras import layers, losses |
from tensorflow.keras.datasets import fashion_mnist |
from tensorflow.keras.models import Model |
from plotly.subplots import make_subplots |
import plotly.graph_objects as go |
from sklearn.decomposition import PCA |
import plotly.express as px |
from scipy.interpolate import griddata |
import sklearn |
from sklearn.tree import DecisionTreeClassifier |
from sklearn.metrics import confusion_matrix, precision_score, roc_auc_score, precision_recall_curve |
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, cross_val_predict, StratifiedKFold |
from sentence_transformers import SentenceTransformer |
from sklearn import tree |
import gradio as gr |
import os |
import json |
from datetime import datetime, timedelta |
import shutil |
import random |
import plotly.io as pio |
import joblib |
autoencoder = keras.models.load_model('models/autoencoder') |
classifier = keras.models.load_model('models/classifier') |
decision_tree = joblib.load("models/decision_tree_model.pkl") |
llm_model = SentenceTransformer(r"sentence-transformers/paraphrase-MiniLM-L6-v2") |
pca_2d_llm_clusters = joblib.load('models/pca_llm_model.pkl') |
print("models loaded") |
dataframe = pd.read_csv('ecg.csv', header=None) |
dataframe[140] = dataframe[140].apply(lambda x: 1 if x==0 else 0) |
df_ecg = dataframe[[i for i in range(140)]] |
ecg_raw_data = df_ecg.values |
labels = dataframe.values[:, -1] |
ecg_data = ecg_raw_data[:, :] |
train_data, test_data, train_labels, test_labels = train_test_split( |
ecg_data, labels, test_size=0.2, random_state=21) |
min_val = tf.reduce_min(train_data) |
max_val = tf.reduce_max(train_data) |
print("constant computing: OK") |
ecg_data = (ecg_data - min_val) / (max_val - min_val) |
ecg_data = tf.cast(ecg_data, tf.float32) |
print(ecg_data.shape) |
X = autoencoder.encoder(ecg_data).numpy() |
n_components=2 |
pca = PCA(n_components=n_components) |
X_compressed = pca.fit_transform(X) |
column_names = [f"Feature{i + 1}" for i in range(n_components)] |
categories = ["normal","heart disease"] |
target_categorical = pd.Categorical.from_codes(labels.astype(int), categories=categories) |
df_compressed = pd.DataFrame(X_compressed, columns=column_names) |
df_compressed["target"] = target_categorical |
print("PCA: done") |
df_plot = pd.read_csv("df_mappa.csv", sep=",", header=0) |
print("df map for decision tree loaded.") |
df_pca_llm = pd.read_csv("df_PCA_llm.csv",sep=",",header=0) |
def df_encoding(df): |
df.ExerciseAngina.replace( |
{ |
'N' : 'No', |
'Y' : 'exercise-induced angina' |
}, |
inplace = True |
) |
df.FastingBS.replace( |
{ |
0 : 'Not Diabetic', |
1 : 'High fasting blood sugar' |
}, |
inplace = True |
) |
df.Sex.replace( |
{ |
'M' : 'Man', |
'F' : 'Female' |
}, |
inplace = True |
) |
df.ChestPainType.replace( |
{ |
'ATA' : 'Atypical', |
'NAP' : 'Non-Anginal Pain', |
'ASY' : 'Asymptomatic', |
'TA' : 'Typical Angina' |
}, |
inplace = True |
) |
df.RestingECG.replace( |
{ |
'Normal' : 'Normal', |
'ST' : 'ST-T wave abnormality', |
'LVH' : 'Probable left ventricular hypertrophy' |
}, |
inplace = True |
) |
df.ST_Slope.replace( |
{ |
'Up' : 'Up', |
'Flat' : 'Flat', |
'Down' : 'Downsloping' |
}, |
inplace = True |
) |
return df |
def compile_text_no_target(x): |
text = f"""Age: {x['Age']}, |
Sex: {x['Sex']}, |
Chest Pain Type: {x['ChestPainType']}, |
RestingBP: {x['RestingBP']}, |
Cholesterol: {x['Cholesterol']}, |
FastingBS: {x['FastingBS']}, |
RestingECG: {x['RestingECG']}, |
MaxHR: {x['MaxHR']} |
Exercise Angina: {x['ExerciseAngina']}, |
Old peak: {x['Oldpeak']}, |
ST_Slope: {x['ST_Slope']} |
""" |
return text |
def LLM_transform(df , model = llm_model): |
sentences = df.apply(lambda x: compile_text_no_target(x), axis=1).tolist() |
output = model.encode(sentences=sentences, show_progress_bar= True, normalize_embeddings = True) |
df_embedding = pd.DataFrame(output) |
return df_embedding |
def upload_ecg(file): |
if len(os.listdir("current_ecg"))>0: |
try: |
for filename in os.listdir("current_ecg"): |
file_path = os.path.join("current_ecg", filename) |
if os.path.isfile(file_path): |
os.remove(file_path) |
print(f"I file nella cartella 'current_ecg' sono stati eliminati.") |
except Exception as e: |
print(f"Errore nell'eliminazione dei file: {str(e)}") |
df = pd.read_csv(file.name,header=None) |
source_directory = os.path.dirname(file.name) |
destination_directory = 'current_ecg' |
file_to_copy = os.path.basename(file.name) |
source_file_path = f"{source_directory}/{file_to_copy}" |
destination_file_path = f"{destination_directory}/{file_to_copy}" |
shutil.copy(source_file_path, destination_file_path) |
return "Your ECG is ready, you can analyze it!" |
def ecg_availability(patient_name): |
folder_path = os.path.join("PATIENT",patient_name) |
status_file_path = os.path.join(folder_path, "status.json") |
if not os.path.isfile(status_file_path): |
return None |
with open(status_file_path, 'r') as status_file: |
status_data = json.load(status_file) |
last_datetime_str = status_data.get("last_datetime", None) |
csv_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")] |
if last_datetime_str is None: |
return f"New ECG available" |
last_datetime = datetime.strptime(last_datetime_str, "%B_%d_%H_%M_%S") |
successive_csv_files = [] |
for csv_file in csv_files: |
csv_datetime_str = csv_file.split('.')[0] |
csv_datetime = datetime.strptime(csv_datetime_str, "%B_%d_%H_%M_%S") |
if csv_datetime > last_datetime: |
successive_csv_files.append(csv_file) |
if len(successive_csv_file)>0: |
return f"New ECG available (last ECG: {last_datetime})" |
else: |
return f"No ECG available (last ECG: {last_datetime})" |
def ecg_analysis(): |
df = pd.read_csv(os.path.join("current_ecg",os.listdir("current_ecg")[0])) |
df_ecg = df[[str(i) for i in range(140)]] |
df_data = df_ecg.values |
df_data = (df_data - min_val) / (max_val - min_val) |
df_data = tf.cast(df_data, tf.float32) |
df_tree = df[["ChestPainType","ST_Slope"]].copy() |
df_llm = df[["Age","Sex","ChestPainType","RestingBP","Cholesterol","FastingBS","RestingECG","MaxHR","ExerciseAngina","Oldpeak","ST_Slope"]].copy() |
true_label = df.values[:,-1] |
heartbeat_encoder_preds = autoencoder.encoder(df_data).numpy() |
heartbeat_decoder_preds = autoencoder.decoder(heartbeat_encoder_preds).numpy() |
classification_res = classifier.predict(df_data) |
print("shapes of: encoder preds, decoder preds, classification preds/n",heartbeat_encoder_preds.shape,heartbeat_decoder_preds.shape,classification_res.shape) |
p_encoder_preds = heartbeat_encoder_preds[0,:] |
p_decoder_preds = heartbeat_decoder_preds[0,:] |
p_class_res = classification_res[0,:] |
p_true = true_label[0] |
fig = px.scatter(df_compressed, x='Feature1', y='Feature2', color='target', color_discrete_map={0: 'red', 1: 'blue'}, |
labels={'Target': 'Binary Target'},size_max=18) |
fig.update_layout( |
xaxis_title='component 1', |
yaxis_title='component 2' |
) |
new_point_compressed = pca.transform(p_encoder_preds.reshape(1,-1)) |
new_point = {'X':[new_point_compressed[0][0]] , 'Y':[new_point_compressed[0][1]] } |
new_point_df = pd.DataFrame(new_point) |
fig.add_trace(go.Scatter( |
x=new_point_df['X'], |
y=new_point_df['Y'], |
mode='markers', |
marker=dict(symbol='star', color='black', size=15), |
name='actual patient' |
)) |
d = fig.to_dict() |
d["data"][0]["type"] = "scatter" |
fig=go.Figure(d) |
fig_reconstruction = go.Figure() |
sns.set(font_scale=2) |
sns.set_style("white") |
fig_reconstruction.add_trace( |
go.Scatter(x=np.arange(140), y=df_data[0], fill=None, mode='lines', name='Input', line=dict(color='black', width=3))) |
fig_reconstruction.add_trace( |
go.Scatter(x=np.arange(140), y=heartbeat_decoder_preds[0], fill=None, mode='lines', name='Reconstruction', |
line=dict(color='red', width=3))) |
fill_x = list(np.arange(140)) + list(reversed(np.arange(140))) |
fill_y = list(heartbeat_decoder_preds[0]) + list(reversed(df_data[0])) |
fig_reconstruction.add_trace(go.Scatter(x=fill_x, y=fill_y, fill='tozeroy', fillcolor='rgba(255, 182, 193, 10.0)', mode='lines', line=dict(color='rgba(255, 182, 193, 0.5)', width=0), name='Error')) |
fig_reconstruction.update_layout( |
legend=dict( |
x=1.1, |
y=1.05, |
) |
) |
encoded_features = ['ST_Slope_Up', 'ST_Slope_Flat', 'ST_Slope_Down', 'ChestPainType_ASY', 'ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA'] |
X_plot = pd.DataFrame(columns=encoded_features) |
for k in range(len(df_tree['ST_Slope'])): |
X_plot.loc[k] = 0 |
if df_tree['ST_Slope'][k] == 'Up': |
X_plot['ST_Slope_Up'][k] = 1 |
if df_tree['ST_Slope'][k] == 'Flat': |
X_plot['ST_Slope_Flat'][k] = 1 |
if df_tree['ST_Slope'][k] == 'Down': |
X_plot['ST_Slope_Down'][k] = 1 |
if df_tree['ChestPainType'][k] == 'ASY': |
X_plot['ChestPainType_ASY'][k] = 1 |
if df_tree['ChestPainType'][k] == 'ATA': |
X_plot['ChestPainType_ATA'][k] = 1 |
if df_tree['ChestPainType'][k] == 'NAP': |
X_plot['ChestPainType_NAP'][k] = 1 |
if df_tree['ChestPainType'][k] == 'TA': |
X_plot['ChestPainType_TA'][k] = 1 |
y_score = decision_tree.predict_proba(X_plot)[:,1] |
chest_pain = [] |
slop = [] |
for k in range(len(X_plot)): |
if X_plot['ChestPainType_ASY'][k] == 1 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0: |
chest_pain.append(0) |
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 1 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 0: |
chest_pain.append(1) |
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 1 and X_plot['ChestPainType_TA'][k] == 0: |
chest_pain.append(2) |
if X_plot['ChestPainType_ASY'][k] == 0 and X_plot['ChestPainType_ATA'][k] == 0 and X_plot['ChestPainType_NAP'][k] == 0 and X_plot['ChestPainType_TA'][k] == 1: |
chest_pain.append(3) |
if X_plot['ST_Slope_Up'][k] == 1 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 0: |
slop.append(0) |
if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 1 and X_plot['ST_Slope_Down'][k] == 0: |
slop.append(1) |
if X_plot['ST_Slope_Up'][k] == 0 and X_plot['ST_Slope_Flat'][k] == 0 and X_plot['ST_Slope_Down'][k] == 1: |
slop.append(2) |
fig_tree = plt.figure() |
x1 = np.linspace(df_plot['ST_Slope'].min()-0.5, df_plot['ST_Slope'].max()+0.5) |
x2 = np.linspace(df_plot['ChestPainType'].min()-0.5, df_plot['ChestPainType'].max()+0.5) |
X1, X2 = np.meshgrid(x1, x2) |
points = df_plot[['ST_Slope', 'ChestPainType']].values |
values = df_plot['Prob'].values |
Z = griddata(points, values, (X1, X2), method='nearest') |
plt.contourf(X1, X2, Z, cmap='coolwarm', levels=10) |
plt.colorbar(label='Predicted Probability') |
plt.scatter(slop[:1], chest_pain[:1], c="k", cmap='coolwarm', edgecolor='k', marker='o', label=f'prob={y_score[:1].round(3)}') |
plt.xticks([]) |
plt.yticks([]) |
plt.text(0.0, -0.7, "Up", ha='center',fontsize=15) |
plt.text(1.00, -0.7, "Flat", ha='center',fontsize=15) |
plt.text(2.00, -0.7, "Down", ha='center',fontsize=15) |
plt.text(-0.62, 0.0, "ASY", rotation='vertical', va='center',fontsize=15) |
plt.text(-0.62, 1.00, "ATA", rotation='vertical', va='center',fontsize=15) |
plt.text(-0.62, 2.0, "NAP", rotation='vertical', va='center',fontsize=15) |
plt.text(-0.62, 3.0, "TA", rotation='vertical', va='center',fontsize=15) |
plt.xlabel('ST_Slope', fontsize=15, labelpad=20) |
plt.ylabel('ChestPainType', fontsize=15, labelpad=20) |
df_llm_encoding = df_encoding(df_llm) |
df_point_LLM = LLM_transform(df_llm_encoding) |
df_point_LLM.columns = [str(column) for column in df_point_LLM.columns] |
pca_llm_point = pca_2d_llm_clusters.transform(df_point_LLM) |
pca_llm_point.columns = ["comp1", "comp2"] |
fig_llm_cluster = go.Figure() |
for cluster in df_pca_llm['cluster'].unique(): |
cluster_data = df_pca_llm[df_pca_llm['cluster'] == cluster] |
fig_llm_cluster.add_trace( |
go.Scatter(x=cluster_data['comp1'], y=cluster_data['comp2'], mode='markers', name=f'Cluster {cluster}')) |
fig_llm_cluster.update_traces(marker=dict(size=12)) |
fig_llm_cluster.update_xaxes(title_text="Principal Component 1") |
fig_llm_cluster.update_yaxes(title_text="Principal Component 2") |
fig_llm_cluster.add_trace( |
go.Scatter(x=pca_llm_point['comp1'], y=pca_llm_point['comp2'], mode='markers', name='Patient', |
marker=dict(size=12, symbol='diamond', line=dict(width=2, color='Black')))) |
fig_llm_cluster.update_layout( |
legend=dict( |
x=1.05, |
y=1 |
) |
) |
fig_llm_cluster.update_xaxes(showgrid=False) |
fig_llm_cluster.update_yaxes(showgrid=False) |
return fig, fig_reconstruction , f"Heart disease probability: {int(p_class_res[0]*100)} %" , fig_tree , f"Heart disease probability: {int(y_score[0]*100)} %" , fig_llm_cluster |
with gr.Blocks(title="TIQUE - AI DEMO CAPABILITIES") as demo: |
gr.Markdown("<h1><center>TIQUE: AI DEMO CAPABILITIES<center><h1>") |
with gr.Row(): |
pazienti = ["Elisabeth Smith","Michael Mims"] |
menu_pazienti = gr.Dropdown(choices=pazienti,label="patients") |
available_ecg_result = gr.Textbox() |
menu_pazienti.input(ecg_availability, inputs=[menu_pazienti], outputs=[available_ecg_result]) |
with gr.Row(): |
input_file = gr.UploadButton("Upload patient's data and latest ECG π") |
text_upload_results = gr.Textbox() |
input_file.upload(upload_ecg,inputs=[input_file],outputs=text_upload_results) |
with gr.Row(): |
ecg_start_analysis_button = gr.Button(value="Start data analysis",scale=1) |
gr.Markdown("## Patient positioning on clusters") |
with gr.Row(): |
llm_cluster = gr.Plot() |
gr.Markdown("## ECG analysis:") |
with gr.Row(): |
with gr.Column(): |
latent_space_representation = gr.Plot() |
with gr.Column(): |
autoencoder_ecg_reconstruction = gr.Plot() |
classifier_nn_prediction = gr.Textbox() |
gr.Markdown("## Patient's classification based on Chest Pain Type and ST Slope:") |
with gr.Row(): |
decision_tree_plot = gr.Plot() |
decision_tree_proba = gr.Textbox() |
ecg_start_analysis_button.click(fn=ecg_analysis, inputs=None, outputs=[latent_space_representation, |
autoencoder_ecg_reconstruction, |
classifier_nn_prediction,decision_tree_plot, decision_tree_proba, |
llm_cluster]) |
if __name__ == "__main__": |
demo.launch() |