import streamlit as st import pandas as pd import numpy as np import matplotlib.pyplot as plt from sklearn.ensemble import RandomForestClassifier, VotingClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.linear_model import LogisticRegression from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB from sklearn.neural_network import MLPClassifier from sklearn.ensemble import GradientBoostingClassifier from xgboost import XGBClassifier from lightgbm import LGBMClassifier from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score st.title('Kidney Disease Prediction Application') st.write(''' Please fill in the attributes below, then hit the Predict button to get your results. ''') st.header('Input Attributes') age = st.slider('Your Age (Years)', min_value=0.0, max_value=100.0, value=50.0, step=1.0) st.write(''' ''') bp = st.slider('Blood Pressure (mm/Hg)', min_value=0.0, max_value=200.0, value=150.0, step=1.0) st.write(''' ''') s = st.radio("Specific Gravity (SG)", ('SG 1.005: Very Low Urnine Concentration', 'SG 1.010: Moderately Low Urnine Concentration', 'SG 1.015: Normal', 'SG 1.020: Slightly High Urine Concentration','SG 1.025: High Urine Concentration')) st.write(''' ''') # Specific Gravity if s == "SG 1.005: Very Low Urnine Concentration": sg = 1.005 elif s == "SG 1.010: Moderately Low Urnine Concentration": sg = 1.010 elif s == "SG 1.015: Normal": sg = 1.015 elif s == "SG 1.020: Slightly High Urine Concentration": sg = 1.020 else: sg = 1.025 a = st.radio("Albumin Level (g/L)", ('Low (less then 33.9)', 'Slightly Low (33.9-35)', 'Normal (35 – 50 g/L)', 'Slightly High (50 - 51.5)', 'High (51.5 - 150)' , 'Extremely High (Over 150)')) st.write(''' ''') # Specific Gravity if a == "Low (less then 33.9)": al = 0 elif a == "Slightly Low (33.9-35)": al = 1 elif a == "Normal (35 – 50 g/L)": al = 2 elif a == "Slightly High (50 - 51.5)": al = 3 elif a == "High (51.5 - 100)": al = 4 else: al = 5 sug = st.radio("Sugar Level", ('Low', 'Slightly Low', 'Normal', 'Slightly High', 'High' , 'Extremely High')) st.write(''' ''') # Specific Gravity if sug == "Low)": sugar = 0 elif sug == "Slightly Low": sugar = 1 elif sug == "Normal": sugar = 2 elif sug == "Slightly High": sugar = 3 elif sug == "High": sugar = 4 else: sugar = 5 red = st.radio("Red Blood Cell Count", ('Normal', 'Abnormal')) st.write(''' ''') # blood cell if red == "Normal": rbc = 0 else: rbc = 1 pus = st.radio("Pus Cell Count", ('Normal', 'Abnormal')) st.write(''' ''') # pus cell if pus == "Normal": pc = 0 else: pc = 1 pusc = st.radio("Pus Cell Clumps", ('Present', 'Not Present')) st.write(''' ''') # pus cell if pusc == "Present": pcc = 1 else: pcc = 0 ba = st.radio("Bacterial Infection", ('Present', 'Not Present')) st.write(''' ''') # pus cell if ba == "Present": bac = 1 else: bac = 0 bgr = st.slider('Blood Glucose Random (mgs/dl)', min_value=0.0, max_value=600.0, value=300.0, step=1.0) st.write(''' ''') bu = st.slider('Blood Urea (mgs/dl)', min_value=0.0, max_value=500.0, value=250.0, step=0.1) st.write(''' ''') sc = st.slider('Serum Creatinine (mgs/dl)', min_value=0.0, max_value=100.0, value=50.0, step=0.1) st.write(''' ''') sod = st.slider('Sodium (mEq/L)', min_value=0.0, max_value=200.0, value=100.0, step=0.1) st.write(''' ''') pot = st.slider('Potassium (mEq/L)', min_value=0.0, max_value=100.0, value=50.0, step=0.1) st.write(''' ''') hemo = st.slider('Hemoglobin (gms)', min_value=0.0, max_value=20.0, value=10.0, step=0.1) st.write(''' ''') pcv = st.slider('Packed Cell Volume', min_value=0.0, max_value=100.0, value=50.0, step=0.1) st.write(''' ''') wbc = st.slider('White Blood Cell Count (cells/cumm)', min_value=0.0, max_value=50000.0, value=25000.0, step=1.0) st.write(''' ''') rbcc = st.slider('Red Blood Cell Count (millions/cmm)', min_value=0.0, max_value=200.0, value=100.0, step=1.0) st.write(''' ''') hyp = st.radio("Hypertension", ('Yes', 'No')) st.write(''' ''') if hyp == "Yes": htn = 1 else: htn = 0 diam = st.radio("Diabetes Mellitus", ('Yes', 'No')) st.write(''' ''') if diam == "Yes": dm = 1 else: dm = 0 cor = st.radio("Coronary Artery Disease", ('Yes', 'No')) st.write(''' ''') if cor == "Yes": cad = 1 else: cad = 0 app = st.radio("Appetite", ('Good', 'Poor')) st.write(''' ''') if app == "Good": appet = 1 else: appet = 0 pedal = st.radio("Pedal Edema", ('Yes', 'No')) st.write(''' ''') if pedal == "Yes": pe = 1 else: pe = 0 anemia = st.radio("Anemia", ('Yes', 'No')) st.write(''' ''') if anemia == "Yes": ane = 1 else: ane = 0 selected_models = st.multiselect("Choose Classifier Models", ('Random Forest', 'Naïve Bayes', 'Logistic Regression', 'Decision Tree', 'XGBoost')) st.write(''' ''') # Initialize an empty list to store the selected models models_to_run = [] # Check which models were selected and add them to the models_to_run list if 'Random Forest' in selected_models: models_to_run.append(RandomForestClassifier()) if 'Naïve Bayes' in selected_models: models_to_run.append(GaussianNB()) if 'Logistic Regression' in selected_models: models_to_run.append(LogisticRegression()) if 'Decision Tree' in selected_models: models_to_run.append(DecisionTreeClassifier()) if 'Gradient Boosting' in selected_models: models_to_run.append(GradientBoostingClassifier()) if 'Support Vector Machine' in selected_models: models_to_run.append(SVC()) if 'LightGBM' in selected_models: models_to_run.append(LGBMClassifier()) if 'XGBoost' in selected_models: models_to_run.append(XGBClassifier()) user_input = np.array([age, bp, sg, al, sugar, rbc, pc, pcc, bac, bgr, bu, sc, sod, pot, hemo, pcv, wbc, rbcc, htn, dm, cad, appet, pe, ane]).reshape(1, -1) # import dataset def get_dataset(): data = pd.read_csv('kidney.csv') # Calculate the correlation matrix # corr_matrix = data.corr() # Create a heatmap of the correlation matrix # plt.figure(figsize=(10, 8)) # sns.heatmap(corr_matrix, annot=True, cmap='coolwarm') # plt.title('Correlation Matrix') # plt.xticks(rotation=45) # plt.yticks(rotation=0) # plt.tight_layout() # Display the heatmap in Streamlit # st.pyplot() return data def generate_model_labels(model_names): model_labels = [] for name in model_names: words = name.split() if len(words) > 1: # Multiple words, use initials label = "".join(word[0] for word in words) else: # Single word, take the first 3 letters label = name[:3] model_labels.append(label) return model_labels if st.button('Submit'): df = get_dataset() # fix column names df.columns = (["id", "age", "bp", "sg", "al", "su", "rbc", "pc", "pcc", "ba", "bgr", "bu", "sc", "sod", "pot", "hemo", "pcv", "wc", "rc", "htn", "dm", "cad", "appet", "pe", "ane", "class"]) # Transforming classification into numerical format df['class'] = df['class'].apply(lambda x: 1 if x == 'ckd' else 0) # Transforming ane into numerical format df['ane'] = df['ane'].apply(lambda x: 1 if x == 'yes' else 0) # Transforming pe into numerical format df['pe'] = df['pe'].apply(lambda x: 1 if x == 'yes' else 0) # Transforming appet into numerical format df['appet'] = df['appet'].apply(lambda x: 1 if x == 'poor' else 0) # Transforming cad into numerical format df['cad'] = df['cad'].apply(lambda x: 1 if x == 'yes' else 0) # Transforming dm into numerical format df['dm'] = df['dm'].apply(lambda x: 1 if x == 'yes' else 0) # Transforming htn into numerical format df['htn'] = df['htn'].apply(lambda x: 1 if x == 'yes' else 0) # Transforming ba into numerical format df['ba'] = df['ba'].apply(lambda x: 1 if x == 'present' else 0) # Transforming pcc into numerical format df['pcc'] = df['pcc'].apply(lambda x: 1 if x == 'present' else 0) # Transforming pc into numerical format df['pc'] = df['pc'].apply(lambda x: 1 if x == 'abnormal' else 0) # Transforming rbc into numerical format df['rbc'] = df['rbc'].apply(lambda x: 1 if x == 'abnormal' else 0) # Replace NaN values with median for float columns float_columns = df.select_dtypes(include=['float']).columns df[float_columns] = df[float_columns].fillna(df[float_columns].median()) # Convert columns to numeric numeric_columns = ['pcv', 'wc', 'rc'] df[numeric_columns] = df[numeric_columns].apply(pd.to_numeric, errors='coerce') # Replace NaN values with median for numeric columns df[numeric_columns] = df[numeric_columns].fillna(df[numeric_columns].median()) # Split the dataset into train and test X = df.drop(['class','id'], axis=1) y = df['class'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Create two columns to divide the screen left_column, right_column = st.columns(2) # Left column content with left_column: # Create a VotingClassifier with the top 3 models ensemble = VotingClassifier( estimators=[('rf', RandomForestClassifier()), ('xgb', XGBClassifier()), ('gb', GradientBoostingClassifier())], voting='hard') # Fit the voting classifier to the training data ensemble.fit(X_train, y_train) # Make predictions on the test set model_predictions = ensemble.predict(user_input) # Evaluate the model's performance on the test set ensamble_accuracy = accuracy_score(y_test, ensemble.predict(X_test)) ensamble_precision = precision_score(y_test, ensemble.predict(X_test)) ensamble_recall = recall_score(y_test, ensemble.predict(X_test)) ensamble_f1score = f1_score(y_test, ensemble.predict(X_test)) if model_predictions == 1: st.write(f'According to Ensemble Model You have a **Very High Chance (1)** of Kidney Disease.') else: st.write(f'According to Ensemble Model You have a **Very Low Chance (0)** of Kidney Disease.') st.write('Ensemble Model Accuracy:', ensamble_accuracy) st.write('Ensemble Model Precision:', ensamble_precision) st.write('Ensemble Model Recall:', ensamble_recall) st.write('Ensemble Model F1 Score:', ensamble_f1score) st.write('------------------------------------------------------------------------------------------------------') # Right column content with right_column: for model in models_to_run: # Train the selected model model.fit(X_train, y_train) # Make predictions on the test set model_predictions = model.predict(user_input) # Evaluate the model's performance on the test set model_accuracy = accuracy_score(y_test, model.predict(X_test)) model_precision = precision_score(y_test, model.predict(X_test)) model_recall = recall_score(y_test, model.predict(X_test)) model_f1score = f1_score(y_test, model.predict(X_test)) if model_predictions == 1: st.write(f'According to {type(model).__name__} Model You have a **Very High Chance (1)** of Kidney Disease.') else: st.write(f'According to {type(model).__name__} Model You have a **Very Low Chance (0)** of Kidney Disease.') st.write(f'{type(model).__name__} Accuracy:', model_accuracy) st.write(f'{type(model).__name__} Precision:', model_precision) st.write(f'{type(model).__name__} Recall:', model_recall) st.write(f'{type(model).__name__} F1 Score:', model_f1score) st.write('------------------------------------------------------------------------------------------------------') # Initialize lists to store model names and their respective performance metrics model_names = ['Ensemble'] accuracies = [ensamble_accuracy] precisions = [ensamble_precision] recalls = [ensamble_recall] f1_scores = [ensamble_f1score] # Loop through the selected models to compute their performance metrics for model in models_to_run: model_names.append(type(model).__name__) model.fit(X_train, y_train) model_predictions = model.predict(X_test) accuracies.append(accuracy_score(y_test, model_predictions)) precisions.append(precision_score(y_test, model_predictions)) recalls.append(recall_score(y_test, model_predictions)) f1_scores.append(f1_score(y_test, model_predictions)) # Create a DataFrame to store the performance metrics metrics_df = pd.DataFrame({ 'Model': model_names, 'Accuracy': accuracies, 'Precision': precisions, 'Recall': recalls, 'F1 Score': f1_scores }) # Get the model labels model_labels = generate_model_labels(metrics_df['Model']) # Plot the comparison graphs plt.figure(figsize=(12, 10)) # Accuracy comparison plt.subplot(2, 2, 1) plt.bar(model_labels, metrics_df['Accuracy'], color='skyblue') plt.title('Accuracy Comparison') plt.ylim(0, 1) # Precision comparison plt.subplot(2, 2, 2) plt.bar(model_labels, metrics_df['Precision'], color='orange') plt.title('Precision Comparison') plt.ylim(0, 1) # Recall comparison plt.subplot(2, 2, 3) plt.bar(model_labels, metrics_df['Recall'], color='green') plt.title('Recall Comparison') plt.ylim(0, 1) # F1 Score comparison plt.subplot(2, 2, 4) plt.bar(model_labels, metrics_df['F1 Score'], color='purple') plt.title('F1 Score Comparison') plt.ylim(0, 1) # Adjust layout to prevent overlapping of titles plt.tight_layout() # Display the graphs in Streamlit st.pyplot()