Spaces:

Akankshg
/

Healthcare-PHM

Runtime error

File size: 47,895 Bytes

#pip install stramlit wordcloud
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.figure_factory as ff
import warnings
warnings.filterwarnings("ignore")
from wordcloud import WordCloud
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.preprocessing import LabelEncoder
from pandasai import SmartDataframe
from pandasai.llm.google_gemini import GoogleGemini
import warnings
from pandasai.responses.response_parser import ResponseParser
# pip install wordcloud
# !pip install kmodes

from sklearn.decomposition import PCA
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from kmodes.kprototypes import KPrototypes
import plotly.graph_objects as go
import streamlit as st
#pip install google-generativeai


import os
from huggingface_hub import hf_hub_download

repo_id = "Akankshg/ML_DATA"
filename = "EDA_DATA.parquet"

# Access the token
token = os.environ["HUGGING_FACE_HUB_TOKEN"]

# Download the file
local_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset",token=token)



class StreamlitResponse(ResponseParser):
    def __init__(self, context) -> None:
        super().__init__(context)

    def format_dataframe(self, result):
        st.dataframe(result["value"])
        return

    def format_plot(self, result):
        st.image(result["value"])
        return


st.set_page_config(page_title="Healthcare Data Analysis", page_icon=":bar_chart:", layout="wide")
st.title(':bar_chart: Healthcare Data Analysis Dashboard')
st.markdown('<style>div.block-container{padding-top:1rem;}</style>',unsafe_allow_html=True)

# Sidebar 1
st.sidebar.title('Dashboard Options')
analysis_option = st.sidebar.selectbox('Select Analysis', ['Data','EDA', 'Machine Learning','Health Care Chat Bot AI'])

## Loading data
@st.cache_data()
def fetch_data():
    data = pd.read_parquet(local_file)
    return data
data = fetch_data()

def funnel_chart(df):
    Patient_visit = df[['PatientID','EncounterDate','LegalSex']].copy()
    Patient_visit['WeekDay'] =  Patient_visit['EncounterDate'].dt.day_name()
    Patient_visit['WeekDay'] = Patient_visit['WeekDay'].astype('string')
    output_df = Patient_visit.groupby(['WeekDay', 'LegalSex']).size().unstack(fill_value=0)
    output_df.reset_index(inplace=True)
    if 'Male' in output_df.columns:
        if 'Female' in  output_df.columns:
            desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
            output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
            stages = output_df['WeekDay']
            df_female = pd.DataFrame(dict(number=output_df['Female'], stage=stages))
            df_male = pd.DataFrame(dict(number=output_df['Male'], stage=stages))
            df_female['Gender'] = 'Female'
            df_male['Gender'] = 'Male'
            df_graph = pd.concat([df_male, df_female], axis=0)
            colors = {'Male': '#2986cc', 'Female': '#c90076'}
            fig2 = px.funnel(df_graph, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
            fig2.update_layout(
                template="plotly_dark",
                xaxis_title='Number of Patients',
                yaxis_title='Weekday',
                height=500, width=250
            )
            return fig2
        else:
            desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
            output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
            stages = output_df['WeekDay']
            df_male = pd.DataFrame(dict(number=output_df['Male'], stage=stages))
            df_male['Gender'] = 'Male'
            colors = {'Male': '#2986cc', 'Female': '#c90076'}
            fig2 = px.funnel(df_male, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
            fig2.update_layout(
                template="plotly_dark",
                xaxis_title='Number of Patients',
                yaxis_title='Weekday',height=500, width=250)
            return fig2 
    else:
        desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
        output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
        stages = output_df['WeekDay']
        df_female = pd.DataFrame(dict(number=output_df['Female'], stage=stages))
        df_female['Gender'] = 'Female'
        colors = {'Male': '#2986cc', 'Female': '#c90076'}
        fig2 = px.funnel(df_female, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
        fig2.update_layout(
            template="plotly_dark",
            xaxis_title='Number of Patients',
            yaxis_title='Weekday',height=500, width=250)
        return fig2 

def scatter_man(data):
    Patient_Analysis = data[['PatientID', 'GroupedICD', 'Description', 'Age']].copy()
    patients_diagnosis = Patient_Analysis[Patient_Analysis['GroupedICD'].notna()]
    patients_diagnosis_info = patients_diagnosis[['PatientID', 'GroupedICD', 'Description', 'Age']]
    patients_tests_info = patients_diagnosis_info[patients_diagnosis_info['Age'].notna()]
    patients_tests_df = pd.DataFrame(patients_tests_info)
    
    patients_icd_counts = patients_tests_df.groupby(['Age', 'GroupedICD','Description']).size().reset_index(name='Count')
    patients_icd_counts = patients_icd_counts[patients_icd_counts['Count']> 1000]
    import plotly.express as px
    # sns.set(rc={"axes.facecolor":"#FFF9ED","figure.facecolor":"#FFF9ED"})
    # Scatter plot
    fig5 = px.scatter(patients_icd_counts, y='Age', x='Description', size='Count',
                     hover_name='Age', color='Count', title='Age - ICD Relationship',color_continuous_scale='ylorrd')
    fig5.update_layout(template="plotly_dark",xaxis_title='ICD Code', yaxis_title='Age',coloraxis_colorbar=dict(title='Count'),
                      height=950, width=1400)
    return fig5


def barplot_lab(df):
    df = df[['PatientID','EncounterDate','ComponentName', 'GroupedICD','Description']].copy()
    df.sort_values(by=['EncounterDate'], ascending=True,inplace = True)
    df['DaysSinceLastVisit'] = df.groupby('PatientID')['EncounterDate'].diff().dt.days
    df = df[df['DaysSinceLastVisit'] <= 7]
    lab = df[df['ComponentName'].notna()].copy()
    lab = lab[lab['GroupedICD'].notna()].copy()
    component= lab.groupby(['ComponentName','Description']).size().reset_index(name='Count')
    sss = component.sort_values(by='Count', ascending=False)[:20].copy()
    fig3 = px.bar(sss, x='ComponentName', y='Count',
             hover_data=['ComponentName', 'Count'], color='ComponentName', height=450, title='Lab Test')
    fig3.update_xaxes(tickangle=45)
    return fig3

def scatterplot(df):
    df = df[['PatientID','EncounterDate','ComponentName', 'GroupedICD','Description']].copy()
    df.sort_values(by=['EncounterDate'], ascending=True,inplace = True)
    df['DaysSinceLastVisit'] = df.groupby('PatientID')['EncounterDate'].diff().dt.days
    df = df[df['DaysSinceLastVisit'] <= 7]
    lab = df[df['ComponentName'].notna()].copy()
    lab = lab[lab['GroupedICD'].notna()].copy()
    component= lab.groupby(['ComponentName','Description']).size().reset_index(name='Count')
    component = component[component['Count']> 2000]
    component['Description'].nunique()
    fig = px.scatter(component, y='ComponentName', x='Description', size='Count',
                 hover_name='ComponentName', color='Count', title='Lab Component-ICD Relationship')
    fig.update_layout(template="plotly_dark",xaxis_title='ICD Code', yaxis_title='Component Name', coloraxis_colorbar=dict(title='Count'),
                  height=550, width=500)
    return fig

####################################### EDA ##################################################################
def histplot_6(data):
    disease_data = data[['Age','LegalSex']].copy()
    disease_data = disease_data[disease_data['Age'].notna() & disease_data['LegalSex'].notna()].copy()
    fig = px.histogram(disease_data, 
                   x='Age', 
                   color='LegalSex', 
                   nbins=10, 
                   opacity=0.5,
                   title='Age Distribution by Legal Sex',
                   color_discrete_sequence=px.colors.qualitative.Pastel)

    # Update layout to match your desired style
    fig.update_layout(
        title_font=dict(size=20, color='white'),
        xaxis_title_font=dict(size=16, color='white'),
        yaxis_title_font=dict(size=16, color='white'),
        xaxis=dict(tickfont=dict(size=14, color='white')),
        yaxis=dict(tickfont=dict(size=14, color='white'))
    )

    return fig


def histplot_7(data):
    import plotly.graph_objects as go
    graph3_data = data[['Age','BP Severity']].copy()
    graph3_data = graph3_data[graph3_data['BP Severity'].notna()]
    graph3_data = graph3_data[graph3_data['BP Severity'] != 'Unknown']
    graph3_data = graph3_data[graph3_data['BP Severity'] != 'BP NORMAL']

    severities = graph3_data['BP Severity'].unique()
    lines = []

    for severity in severities:
        severity_data = graph3_data[graph3_data['BP Severity'] == severity]
        age_counts = severity_data['Age'].value_counts().sort_index()
        lines.append(go.Scatter(x=age_counts.index, y=age_counts.values, mode='lines+markers', name=severity))

    fig = go.Figure(data=lines)

    fig.update_layout(
        title='Age Distribution by BP Severity',
        xaxis_title='Age',
        yaxis_title='Count',
        title_font=dict(size=20, color='white')
    )

    return fig


def pie_chart_7(data):
    import plotly.graph_objects as go

    # Prepare data
    graph_4 = data[['Depression Severity']].copy()
    graph_4 = graph_4[graph_4['Depression Severity'] != 'None-minimal']
    graph_4 = graph_4[graph_4['Depression Severity'] != 'Unknown']
    severity_counts = graph_4['Depression Severity'].value_counts()

    # Define colors
    colors_inner = ['#FF5733', '#FFC300', '#36A2EB', '#C71585']

    # Create plotly figure
    fig = go.Figure()

    # Add donut chart
    fig.add_trace(go.Pie(
        labels=severity_counts.index,
        values=severity_counts,
        hole=0.6,  # Hole size for donut chart
        marker=dict(colors=colors_inner),
        textinfo='label+percent',
        textfont=dict(size=10),
        insidetextorientation='radial'
    ))

    # Update layout for title and appearance
    fig.update_layout(
        title_text="Distribution of Patients by Depression",
        title_font_size=20,
        title_font_color='white',
        # paper_bgcolor='black',
        # plot_bgcolor='black',
        autosize=False,
        # width=500,
        # height=450,
    )

    # Show figure
    return fig

def chart_8(data):
    import plotly.graph_objects as go
    graph_5 = data[['BP Severity', 'BMI', 'LegalSex']].copy()
    graph_5 = graph_5.dropna(subset=['BP Severity', 'BMI', 'LegalSex'])
    graph_5 = graph_5[graph_5['BP Severity'] != 'Unknown']
    graph_5 = graph_5[graph_5['BP Severity'] != 'BP NORMAL']

    # Create box plot
    fig = go.Figure()

    # Add box plot traces for each gender
    for gender in graph_5['LegalSex'].unique():
        filtered_data = graph_5[graph_5['LegalSex'] == gender]
        fig.add_trace(go.Box(
            y=filtered_data['BMI'],
            x=filtered_data['BP Severity'],
            name=gender,
            boxmean='sd',  # Show mean and standard deviation
            marker_color='#1f77b4' if gender == 'Male' else '#ff7f0e',  # Different colors for genders
            text=filtered_data['BP Severity'],  # Adding text for tooltips
            hoverinfo='y+name+text'
        ))

    # Update layout with titles, axis labels, and other properties
    fig.update_layout(
        title='BMI by BP Severity and Legal Sex',
        title_font=dict(size=20, color='white'),
        xaxis_title='BP Severity',
        yaxis_title='BMI',
        xaxis=dict(tickfont=dict(size=14, color='white')),
        yaxis=dict(tickfont=dict(size=14, color='white')),
        boxmode='group',  # Group box plots by BP Severity
        height=600,  # Set the height of the figure
        width=800,   # Set the width of the figure
        # paper_bgcolor='#FAF5E6',
        # plot_bgcolor='#FAF5E6'
    )
    
    return fig


def chart_9(data):
    import plotly.graph_objects as go
    disease_data = data.copy()
    disease_data = disease_data.select_dtypes(include=['int64', 'float64'])
    columns_to_drop = ['PatientID']
    disease_data.drop(columns=columns_to_drop, inplace=True)
    
    # Calculate the correlation matrix
    corrmat = disease_data.corr()
    corrmat.fillna(0, inplace=True)
    
    # Create a heatmap using Plotly
    fig = go.Figure(data=go.Heatmap(
        z=corrmat.values,
        x=corrmat.columns,
        y=corrmat.columns,
        colorscale='RdYlGn',
        # colorbar=dict(title='Correlation', tickvals=[-1, 0, 1], ticktext=['-1', '0', '1']),
        text=corrmat.round(2).values,  # Add annotations
        texttemplate="%{text:.2f}",  # Format annotations
        textfont=dict(size=12, color='black')  # Set annotation font size and color
    ))

    # Update layout
    fig.update_layout(
        title='Which Feature is Mainly Involved',
        title_font=dict(size=20, color='white'),
        xaxis_title='Features',
        yaxis_title='Features',
        xaxis=dict(tickfont=dict(size=14, color='white')),
        yaxis=dict(tickfont=dict(size=14, color='white')),
        height=600,  # Set the height of the figure
        width=800    # Set the width of the figure
    )
    
    return fig

def chart_10(data):
    import plotly.express as px
    import plotly.graph_objects as go

    graph_7 = data.copy()
    graph_7 = graph_7[graph_7['Depression Severity'] != 'None-minimal']
    graph_7 = graph_7[graph_7['Depression Severity'] != 'Unknown']
    graph_7['Age'] = pd.to_numeric(graph_7['Age'], errors='coerce')
    graph_7 = graph_7.dropna(subset=['Age','Depression Severity','LegalSex'])

    # Create the violin plot
    fig = go.Figure()

    for sex in graph_7['LegalSex'].unique():
        fig.add_trace(go.Violin(
            x=graph_7['Depression Severity'][graph_7['LegalSex'] == sex],
            y=graph_7['Age'][graph_7['LegalSex'] == sex],
            legendgroup=sex, scalegroup=sex, name=sex, side='negative' if sex == 'Female' else 'positive',
            line_color='blue' if sex == 'Female' else 'orange'
        ))

    # Update the layout
    fig.update_layout(
        title="Age by Depression Severity and Legal Sex",
        xaxis_title="Depression Severity",
        yaxis_title="Age",
        xaxis=dict(tickmode='array', tickvals=graph_7['Depression Severity'].unique(), tickangle=20),
        yaxis=dict(range=[0, 80]),
        violingap=0.2,  # gap between violins
        violingroupgap=0.3,  # gap between groups
        violinmode='overlay',  # plot violins over each other
        font=dict(color='white', size=14),
        title_font=dict(size=20, color='white'),
        xaxis_tickfont=dict(size=14, color='white'),
        yaxis_tickfont=dict(size=14, color='white'),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        showlegend=True
    )

    return fig


def feature_analytics(disease_data):
    corrmat = disease_data.corr( numeric_only = True)
    corr_threshold = 0.7
    selected_features = []
    for column in corrmat.columns[:]:
        correlated_features = corrmat.index[corrmat[column] > corr_threshold].tolist()
        if correlated_features:
            selected_features.extend(correlated_features)
    selected_features = list(set(selected_features))
    values_to_pop = ['Weight', 'DiastolicBP', 'SystolicBP', 'ComponentValue', 'Height', 'Age', 'BMI']
    for value in values_to_pop:
        if value in selected_features:
            selected_features.remove(value)
    values_to_find = ['PeakFlow', 'Temperature', 'Respiration', 'Pulse', 'SPO2']
    found_values = []
    l = []
    m = []
    not_found_values = []
    for i, value in enumerate(selected_features):
        if value in values_to_find:
            found_values.append((i, value))
            l.append(value)
        else:
            not_found_values.append((i, value))
            m.append(value)
    return l,m



def chart_11(disease_data):
    import plotly.express as px
    feature = feature_analytics(disease_data)
    select,featurel = feature
    Top_feature_Lab = select[0]
    graph_8 = disease_data.copy()
    graph_8 = graph_8.dropna(subset=[Top_feature_Lab, 'Age', 'LegalSex'])

    # Create the scatter plot with Plotly
    fig = px.scatter(
        graph_8,
        x=Top_feature_Lab,
        y="Age",
        color="LegalSex",
        color_discrete_sequence=px.colors.qualitative.Set2,
        title=f'Age group: {Top_feature_Lab}',
        labels={Top_feature_Lab: Top_feature_Lab, 'Age': 'Age'},
        size_max=200
    )

    # Add vertical line at the mean
    mean_value = graph_8[Top_feature_Lab].mean()
    fig.add_vline(x=mean_value, line=dict(color='red', dash='dash'))

    # Customize the layout
    fig.update_layout(
        title_font=dict(size=20, color='white'),
        xaxis_title_font=dict(size=16, color='white'),
        yaxis_title_font=dict(size=16, color='white'),
        xaxis=dict(tickangle=20, tickfont=dict(size=14, color='white')),
        yaxis=dict(tickfont=dict(size=14, color='white'), range=[0, 80]),
        plot_bgcolor='black',
        paper_bgcolor='black'
    )

    return fig




def chart_12(filtered_data):
    graph_10 = filtered_data.copy()
    no_nan = graph_10.dropna(subset=['ImmunizationName'])
    immu = list(no_nan['ImmunizationName'])
    filtered_data = [item for item in immu if item and not pd.isna(item)]
    unique_values = set(filtered_data)
    my_string = ' '.join(unique_values)
    lmao = my_string.strip(', ')
    lmao = lmao.replace(',', '')
    title = "Immunization Word Cloud"
    cloud = WordCloud(scale=3,
                      max_words=150,
                      colormap='RdYlGn',
                      mask=None,
                      background_color='white',
                      stopwords=None,
                      collocations=True,
                      contour_color='black',
                      contour_width=1).generate(lmao)
    # axes[2,2].imshow(cloud, interpolation='bilinear')
    # axes[2,2].axis('off')
    # axes[2,2].set_title( f'Immunization',color='white', fontsize=20)
    plt.show()



def mean_of_values(cell_value):
    if pd.isna(cell_value):  # Check if cell value is NaN
        return np.nan
    values = [float(val) for val in cell_value.split(',')]
    return sum(values) / len(values)

def plots(original_data):
    a = original_data.copy()
    st.subheader("Clustering Analysis")
    col1, col2 = st.columns(2)
    ## 1
    cluster_counts = a['cluster'].value_counts().reset_index()
    cluster_counts.columns = ['cluster', 'count']  # Rename columns
    fig_1 = px.bar(cluster_counts, y='cluster', x='count', 
                labels={'cluster': 'Cluster', 'count': 'Count'},
                text_auto=True,  # text_auto=True displays the count on top of the bars
                color='cluster',  # Assign different colors to each bar
                color_continuous_scale='plasma',  # Use the plasma color scale
                category_orders={'cluster': [0, 1, 2, 3, 4]},
                )  # Set the order of clusters

    custom_labels = {0: 'Cluster 0', 1: 'Cluster 1', 2: 'Cluster 2', 3: 'Cluster 3', 4: 'Cluster 4'}
    fig_1.update_yaxes(tickvals=[0, 1, 2, 3, 4], ticktext=list(custom_labels.values()))

    fig_1.update_layout(
                    title={'text': "Count of Data Points per Cluster", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
                    yaxis_title='Cluster', xaxis_title='Count', 
                    xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
                    yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
                    title_font=dict(color='white', size=18),
                    # plot_bgcolor='black',  # Background color
                    # paper_bgcolor='black',  # Paper background color
                    title_x=0.5,  # Center the title
                    legend=dict(
                        font=dict(size=16, color='white'),
                        bgcolor='rgba(0,0,0,0)'
        ))
    col1.plotly_chart(fig_1,use_container_width=True)

    ## 2
    fig_2 = px.scatter(a, x='Age', y='BMI', 
                    color='cluster', 
                    title="Cluster's Profile Based On Age And BMI",
                    color_continuous_scale='plasma')  # Use the plasma color palette

    fig_2.update_layout(
        title={'text': "Cluster's Profile Based On Age And BMI", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
        # plot_bgcolor='black',  # Background color
        # paper_bgcolor='black',  # Paper background color
        title_font=dict(color='white', size=18),  # Title font color and size
        margin=dict(l=20, r=20, t=40, b=20), # Set margins to make the plot more compact
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        )
    )
    fig_2.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')))

    col2.plotly_chart(fig_2,use_container_width=True)

    col3, col4 = st.columns(2)
    ## 3
    palette = ['#636EFA', '#EF553B']  # Adjust the colors as needed
    fig_3 = go.Figure()
    for sex in a['LegalSex'].unique():
        fig_3.add_trace(go.Box(
            y=a[a['LegalSex'] == sex]['cluster'],
            name=f'Legal Sex: {sex}',
            marker_color=palette.pop(0),  # Pop the first color from the palette
            boxmean=True
        ))
    fig_3.update_layout(
        title={'text':"Clusters Distribution by Legal Sex", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
         title_font=dict(color='white', size=18),
        #  plot_bgcolor='black',
        # paper_bgcolor='black',
        xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        # plot_bgcolor='rgba(0,0,0,0)',
        # paper_bgcolor='rgba(0,0,0,0)',
        title_font_color='white',
        showlegend=True,
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        )
    )

    col3.plotly_chart(fig_3,use_container_width=True)

    ## 4
    # palette = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A']  # Example palette
    fig_4 = px.violin(
        a,
        x="BP Severity",
        y="cluster",
        color="BP Severity",
        color_discrete_sequence=px.colors.qualitative.Vivid,
        box=True,  # Adds a box plot inside the violin plot for more detail
        points="all",  # Shows all data points
        title="Clusters Distribution by BP Severity"
    )
    fig_4.update_layout(
        title={'text':"Clusters Distribution by BP Severity", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        title_font=dict(color='white', size=18),
        xaxis_title="BP Severity",
        yaxis_title="Cluster",
        #  plot_bgcolor='black',
        # paper_bgcolor='black',
        xaxis_title_font=dict(size=16, color='white'),
        yaxis_title_font=dict(size=16, color='white'),
        xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        title_font_color='white',
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        )
    )

    fig_4.update_xaxes(tickangle=45)  # Rotate the x-axis labels for better readability
    
    col4.plotly_chart(fig_4,use_container_width=True)

    col5, col6 = st.columns(2)
    ## 5
    fig_5 = px.histogram(a, x="Depression Severity", color="cluster",
                    color_discrete_sequence=px.colors.diverging.RdYlBu,
                    title='Clusters Distribution by Depression Severity')

    # Update layout to make it more attractive
    fig_5.update_layout(
        title={'text':"Clusters Distribution by Depression Severity", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        title_font=dict(color='white', size=18),
        # plot_bgcolor='black',
        # paper_bgcolor='black',
        title_font_color='white',
        xaxis_title='Depression Severity',
        yaxis_title='Count',
        xaxis_title_font_color='white',
        yaxis_title_font_color='white',
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        ),
        xaxis=dict(
            tickfont=dict(color='white', size=14),
            title_font=dict(color='white', size=16),
            showline=False,
            showgrid=False,
            ticks=''
        ),
        yaxis=dict(
            tickfont=dict(color='white', size=14),
            title_font=dict(color='white', size=16),
            showline=False,
            showgrid=False,
            ticks=''
        ),
        coloraxis_colorbar=dict(
            tickfont=dict(color='white')
        )
    )

    # Show the plot
    col5.plotly_chart(fig_5,use_container_width=True)

    ## 6
    fig_6 = px.violin(a, y="cluster", x="Temp_condition", box=True, points="all",
                    color="Temp_condition", color_discrete_sequence=px.colors.diverging.RdYlBu,
                    title='Clusters Distribution by Temp_condition')

    # Update layout to make it more attractive
    fig_6.update_layout(
        title={'text':"Clusters Distribution by Temp_condition", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        title_font=dict(color='white', size=18),
        # plot_bgcolor='black',
        # paper_bgcolor='black',
        title_font_color='white',
        xaxis_title='Temp_condition',
        yaxis_title='Clusters',
        xaxis_title_font_color='white',
        yaxis_title_font_color='white',
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        ),
        xaxis=dict(
            tickfont=dict(color='white', size=14),
            title_font=dict(color='white', size=16),
            showline=False,
            showgrid=False,
            ticks=''
        ),
        yaxis=dict(
            tickfont=dict(color='white', size=14),
            title_font=dict(color='white', size=16),
            showline=False,
            showgrid=False,
            ticks=''
        ),
        coloraxis_colorbar=dict(
            tickfont=dict(color='white')
        )
    )

    # Show the plot
    col6.plotly_chart(fig_6,use_container_width=True)

    col7, col8 = st.columns(2)

    ##7
    # Create the stacked bar chart
    ad = a.groupby(['weight_condition', 'cluster']).size().reset_index(name='count')

    fig_7 = px.bar(ad, 
                x='weight_condition', 
                y='count', 
                color='cluster', 
                title='Clusters Distribution by Weight Condition',
                text='count',
                barmode='stack',
                color_discrete_sequence=px.colors.diverging.RdYlBu)  # Use a color scale or palette of your choice

    # Update layout to make it more attractive and remove axes elements
    fig_7.update_layout(
        title={'text': 'Clusters Distribution by Weight Condition', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        title_font=dict(color='white', size=18),
        xaxis=dict(
            title='',  # Remove x-axis title
            showline=False,
            showgrid=False,
            zeroline=False,
            tickfont=dict(size=14, color='white'),
            tickangle=45  # Rotate x-axis labels for better readability
        ),
        yaxis=dict(
            title='',  # Remove y-axis title
            showline=False,
            showgrid=False,
            zeroline=False,
            tickfont=dict(size=14, color='white')
        ),
        # plot_bgcolor='black',  # Background color
        # paper_bgcolor='black',  # Paper background color
        margin=dict(l=20, r=20, t=40, b=20),  # Set margins to make the plot more compact
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        )
    )

    # Update bar text style
    fig_7.update_traces(texttemplate='%{text:.2s}', textfont_size=14, textposition='inside', marker=dict(line=dict(width=1, color='DarkSlateGrey')))

    # Show the plot
    col7.plotly_chart(fig_7,use_container_width=True)


    ## 8
    fig_8 = px.box(a, 
             x='SPO2_condition', 
             y='Age', 
             points='all',  # Show all points
             title="Clusters Distribution by SPO2_condition",
             color='cluster',
             color_discrete_sequence=px.colors.sequential.Plasma_r)

    # Update layout to remove axes titles, labels, and gridlines, and style the chart
    fig_8.update_layout(
        title={'text': "Clusters Distribution by SPO2_condition", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
        title_font=dict(color='white', size=18),
        xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
        # plot_bgcolor='black',  # Background color
        # paper_bgcolor='black',  # Paper background color
        margin=dict(l=20, r=20, t=40, b=20),  # Set margins to make the plot more compact
        legend=dict(
            font=dict(size=16, color='white'),
            bgcolor='rgba(0,0,0,0)'
        )
    )

    # Customize the boxen plot appearance
    fig_8.update_traces(
        boxmean=True,  # Add mean line
        jitter=0.3,  # Spread points along x-axis
        marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey'))
    )

    # Show the plot
    col8.plotly_chart(fig_8,use_container_width=True)

    col_11 = st.columns(1)[0]
    fig_11 = px.scatter_matrix(
    a[['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI', 'cluster']],
    dimensions=['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI'],
    color='cluster',
    title="Scatter Matrix of Selected Features by Cluster",
    labels={col: col for col in ['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI']},
    color_continuous_scale= px.colors.diverging.Spectral
    )

    # Update layout for better visualization
    fig_11.update_traces(diagonal_visible=True)
    fig_11.update_layout(height=700, width=700, showlegend=True)

    # Show the plot
    col_11.plotly_chart(fig_11,use_container_width=True)
    #

    ##### Joint Plot
    st.subheader("Summary")
    meanvalue_columns = [col for col in list(a.columns) if 'meanvalue' in col]
        # Group data by clusters
    grouped_data = a.groupby('cluster')

    # Calculate mean for numerical columns
    numerical_columns = a.select_dtypes(include=['number']).columns
    numerical_summary = grouped_data[numerical_columns].mean()

    # Calculate mode for categorical columns
    categorical_columns = a.select_dtypes(include=['object', 'category','string']).columns
    categorical_summary = grouped_data[categorical_columns].agg(lambda x: x.value_counts().index[0])

    for i in range(len(a['cluster'].value_counts())):
    # Example for Cluster 0
        cluster_traits = {
            "Age": numerical_summary.loc[i, 'Age'],
            "Age_Category": categorical_summary.loc[i,"Age_Category"],
            "SystolicBP": numerical_summary.loc[i, 'SystolicBP'],
            "Depression Severity": categorical_summary.loc[i, 'Depression Severity'],
            "Weight Condition" : categorical_summary.loc[i, 'weight_condition'],
            "BP Severity" : categorical_summary.loc[i, 'BP Severity'],
            "Pulse_condition" : categorical_summary.loc[i, 'Pulse_condition'],
            "Respiration_condition" : categorical_summary.loc[i, 'Respiration_condition'],
            "SPO2_condition" :  categorical_summary.loc[i, 'SPO2_condition'],

        }

        # if numerical_summary.loc[i, 'GLUCOSE_meanvalue'] > 100:
        #     glucose_condition = "High frequency of patients with slightly elevated glucose levels."
        # else:
        #     glucose_condition = "Normal glucose levels."


        
        # Writing the summary
        summary = f"""
        Cluster - {i} Traits
        1. Age: Average age is {round(cluster_traits['Age'])} years.
        2. SystolicBP: Patients tend to have slightly elevated systolic blood pressure, averaging {cluster_traits['SystolicBP']} mmHg.
        3. Depression Severity: Predominantly '{cluster_traits['Depression Severity']}'.
        4. "Weight Condition" : {cluster_traits['Weight Condition']}.
        5. "Respiration_condition" : {cluster_traits['Respiration_condition']}.
        6. "Pulse_condition" : {cluster_traits['Pulse_condition']}.
        7. "SPO2_condition" : {cluster_traits['SPO2_condition']}.

        Trait Summary: Cluster {i} mainly consists of {cluster_traits['Age_Category']} individuals with {cluster_traits['Depression Severity']} depression level, {cluster_traits['BP Severity'].lower()}.
        """

        st.write(summary)
    st.write(round(numerical_summary[meanvalue_columns],2))
    
    st.subheader("Density Contour Plot")    
    with st.container():
    # Loop through the columns and create plots
        for i in meanvalue_columns:
            fig = px.density_contour(
                a,  # Replace 'a' with your actual DataFrame name
                y="Age",
                x=i,
                color="cluster",
                marginal_x="histogram",
                marginal_y="histogram",
                template="simple_white",
                color_discrete_sequence=px.colors.qualitative.Set1
            )

            # Add fill to the contours for a similar effect to kde
            fig.update_traces(bingroup="fill")

            # Update layout for better aesthetics
            fig.update_layout(
                title=f"Joint Density Contour of {i} vs Age by Clusters",
                yaxis_title="Age",
                xaxis_title=i,
                xaxis=dict(
                    title=i,
                    showline=False,
                    showgrid=False,
                    zeroline=False,
                    tickfont=dict(size=14, color='white'),
                    tickangle=45,  # Rotate x-axis labels for better readability
                    titlefont=dict(size=16, color='white')  # Set x-axis title to white
                ),
                yaxis=dict(
                    title='Age',
                    showline=False,
                    showgrid=False,
                    zeroline=False,
                    tickfont=dict(size=14, color='white'),
                    titlefont=dict(size=16, color='white')  # Set y-axis title to white
                ),
                plot_bgcolor='black',
                paper_bgcolor='black',
                title_font_color='white',
                legend_title="Clusters",
                width=1500,  # Adjust width as needed
                height=800   # Increase height to make the plot taller
            )

            # Display the plot using st.plotly_chart within a column
            st.plotly_chart(fig, use_container_width=True)


def ML(filtered_data, scaler, unscaled_data):
    man = filtered_data.copy()
    man=man.dropna()
    
    man.drop(columns=['PatientID','VisitID'],inplace=True)
    numerical_columns = list(man.select_dtypes(include=['int', 'float']).columns)
    categorial_columns = list(man.select_dtypes(exclude=['int', 'float','datetime']).columns)
    categorical_indexes = []

    for c in categorial_columns:
        categorical_indexes.append(man.columns.get_loc(c))
    
    t = man.shape
    # st.write(t)
    if 5 < t[0] < 10:
        ki =  3
    elif t[0] <= 4 :
        ki = 1
    else:
        ki = 4
    kproto = KPrototypes(n_clusters= ki, init='Huang', n_init = 25, random_state=42)
    kproto.fit_predict(man, categorical= categorical_indexes)
    cluster_labels = kproto.labels_

    original_numeric_data = scaler.inverse_transform(man[numerical_columns])

    # Convert back to DataFrame and add cluster labels
    original_data = pd.DataFrame(original_numeric_data, columns=numerical_columns)
    original_data["cluster"] = cluster_labels
    original_data["cluster"] = original_data["cluster"].astype('category')

    ## PCA Graph
    pca = PCA(n_components=4)
    pca_df = pca.fit_transform(original_data[numerical_columns])
    d = list(original_data[numerical_columns].columns)
    pca_df = pd.DataFrame(pca_df, columns=d[:4])
    
    import plotly.graph_objects as go

    st.subheader("PCA")
    fig_9 = go.Figure(
        go.Scatter3d(mode='markers',
                    x = pca_df.iloc[:, 0],
                    y = pca_df.iloc[:, 1],
                    z = pca_df.iloc[:, 2],
                    marker=dict(size = 4, color = original_data['cluster'], colorscale = 'spectral')
                    )
    )
    
    fig_9.update_layout(
        scene=dict(
            xaxis_title=d[0],
            yaxis_title=d[1],
            zaxis_title=d[2],
            # bgcolor='black',  # Background color inside the 3D plot
            xaxis=dict(color='white'),  # Axis label color
            yaxis=dict(color='white'),
            zaxis=dict(color='white')
        ),
        # plot_bgcolor='black',  # Background color outside the 3D plot
        # paper_bgcolor='black'  # Paper (entire plot area) background color
    )
    col9 = st.columns(1)[0]
    col9.plotly_chart(fig_9, use_container_width=True)




    mann = man[categorial_columns].copy()
    orig = original_data.reset_index(drop=True)
    mann = mann.reset_index(drop=True)
    original_data = pd.concat([orig, mann], axis=1)

    return plots(original_data)



def imputer(filtered_data):
    numeric_columns = filtered_data.select_dtypes(include=['int', 'float'])
    numeric_columns = numeric_columns.iloc[:,2:].copy()

    # Setting the random_state argument for reproducibility
    imputer = IterativeImputer(random_state=42)
    imputed = imputer.fit_transform(numeric_columns)
    Imputed_data = pd.DataFrame(imputed, columns=numeric_columns.columns)
    Imputed_data = round(Imputed_data, 2)
    columns_drop = Imputed_data.columns
    filtered_data = filtered_data.drop(columns=columns_drop)
    Ml_data = pd.concat([filtered_data, Imputed_data], axis=1)
    unscaled_data = Ml_data.copy()

    ##Scaling
    scaled_data = Ml_data.select_dtypes(include=['int', 'float'])
    scaled_data = scaled_data.iloc[:,2:].copy()
    scaler = StandardScaler()
    scaler.fit(scaled_data)
    scaled_data = pd.DataFrame(scaler.transform(scaled_data),columns= scaled_data.columns)
    columns_drop = scaled_data.columns
    Ml_data = Ml_data.drop(columns=columns_drop)
    Ml_data = pd.concat([Ml_data, scaled_data], axis=1)
    Ml_data = Ml_data.convert_dtypes()  # change this to outlier_removed if you want outliwer to be removed
    return ML(Ml_data, scaler, unscaled_data)


filename_1 = "ML_DATA.parquet"

# Access the token
token = os.environ["HUGGING_FACE_HUB_TOKEN"]

# Download the file
local_file_1 = hf_hub_download(repo_id=repo_id, filename=filename_1,repo_type="dataset", token=token)

@st.cache_data()
def fetch_data_1():
    data = pd.read_parquet(local_file_1)
    return data



if analysis_option == 'Machine Learning':
    data = fetch_data_1()
    problem = list(data['Description'].unique())
    st.subheader("_Select Disease_:sunglasses:")
    health_option = st.selectbox("_Select Disease_:sunglasses:",['', *problem], label_visibility="collapsed")
    filtered_data = data[data['Description'] == health_option].copy()
    if filtered_data['key_lab2'].notna().any():
        column_list = ['PatientID', 'VisitID', 'GroupedICD'] + list(filtered_data['key_lab2'].iloc[0])
        pivot_data = pd.pivot_table(filtered_data, values='ComponentValue', index=['PatientID', 'VisitID', 'GroupedICD'], columns='ComponentName', aggfunc=lambda x: ', '.join(map(str, x)))
        pivot_data = pivot_data.reset_index(drop=False)
        pivot_data = pivot_data[column_list].copy()
        filtered_data = pd.merge(filtered_data, pivot_data, on=['PatientID', 'VisitID','GroupedICD'], how='left')
        
        filtered_data.iloc[:, -20:] = filtered_data.iloc[:, -20:].convert_dtypes()
        hmm = pd.DataFrame()
        # num_columns = 20 
        num_columns = len(list(filtered_data['key_lab2'].iloc[0]))
        for i in range(1, num_columns+1):
            existing_column = filtered_data.columns[-i]
            new_column_name = f'{existing_column}_meanvalue'
            hmm[new_column_name] = filtered_data[existing_column].apply(mean_of_values)
        filtered_data = pd.concat([filtered_data, hmm], axis=1)
        column_list = [
        ## Necessary columns
        'PatientID', 'VisitID', 'GroupedICD',
        
        ## Numerical values
        'Age', 'SystolicBP',
        'DiastolicBP','Temperature',
        'Pulse', 'Weight', 'Height', 'BMI', 'Respiration',
        'SPO2', 'PHQ_9Score',
        # 'PeakFlow'
    
        ## Categorial Values
        'LegalSex','BPLocation', 'BPPosition', 'PregnancyStatus', 'LactationStatus', 'TemperatureSource',
        'Age_Category','BP Severity','Depression Severity','weight_condition', 'Temp_condition', 'Pulse_condition',
        'Respiration_condition', 'SPO2_condition', 'PeakF_condition']
        # last = list(filtered_data.columns[-20:])
        last = list(hmm.columns)
        required_columns = column_list + last
        filtered_data = filtered_data[required_columns].copy()
        filtered_data = filtered_data.drop_duplicates().reset_index(drop=True)
        filtered_data = filtered_data.dropna(axis=1, how='all')
        imputer(filtered_data)































































if analysis_option == 'Data':
    age_min = int(data['Age'].min())
    age_max = int(data['Age'].max())
    age_range = st.sidebar.slider('Select Age Range', age_min, age_max, (age_min, age_max))
    data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])].copy()
        
    Sex = data.groupby('LegalSex')['PatientID'].nunique().reset_index(name='count')
    st.subheader("Distribution of Patient's by Sex", divider='rainbow')
    col1, col2,col3 = st.columns(3)
    col1.metric(label="Male", value = Sex[Sex['LegalSex']=='Male']['count'][1])
    col2.metric(label="Female", value = Sex[Sex['LegalSex']=='Female']['count'][0])
    col4, col5 = st.columns(2)
    fig2 = funnel_chart(data)
    col4.plotly_chart(fig2, use_container_width=True)
    fig = scatterplot(data)
    col5.plotly_chart(fig, use_container_width=True)
    col6 = st.columns(1)[0]
    fig_man = scatter_man(data)
    col6.plotly_chart(fig_man, use_container_width=True)

    st.dataframe(data.head(20).style.format({'PatientID': "{:.0f}"}))

if analysis_option == 'EDA':
    age_min = int(data['Age'].min())
    age_max = int(data['Age'].max())
    age_range = st.sidebar.slider('Select Age Range', age_min, age_max, (age_min, age_max))
    data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])].copy()    

    problem = list(data['Description'].unique())
    st.subheader("_Select Disease_:sunglasses:")
    health_option = st.selectbox("_Select Disease_:sunglasses:",['', *problem], label_visibility="collapsed")
    if health_option in problem:
        health_data = data[data['Description'] == health_option].copy()
        Sex = health_data.groupby('LegalSex')['PatientID'].nunique().reset_index(name='count')
        st.subheader(f"Patients for '{health_option}' by Sex", divider='rainbow')
        col1, col2, col3 = st.columns(3)
        if 'Male' in Sex['LegalSex'].values:
            col1.metric(label="Male", value=Sex[Sex['LegalSex'] == 'Male']['count'].iloc[0])
        else:
            col1.metric(label="Male", value=0)
        if 'Female' in Sex['LegalSex'].values:
            col2.metric(label="Female", value=Sex[Sex['LegalSex'] == 'Female']['count'].iloc[0])
        else:
            col2.metric(label="Male", value=0)
        col4, col5 = st.columns(2)
        fig2 = funnel_chart(health_data)
        col4.plotly_chart(fig2, use_container_width=True)

        fig3 = barplot_lab(health_data)
        col5.plotly_chart(fig3, use_container_width=True)

        col6, col7 = st.columns(2)
        fig4 = histplot_6(health_data)
        col6.plotly_chart(fig4, use_container_width=True)

        fig5 = histplot_7(health_data)
        col7.plotly_chart(fig5, use_container_width=True)
        
        col8, col9 = st.columns(2)
        fig6 = pie_chart_7(health_data)
        col8.plotly_chart(fig6, use_container_width=True)

        fig7 = chart_8(health_data)
        col9.plotly_chart(fig7, use_container_width=True)
        

        col10, col11 = st.columns(2)
        fig8 = chart_9(health_data)
        col10.plotly_chart(fig8, use_container_width=True)

        fig9 = chart_10(health_data)
        col11.plotly_chart(fig9, use_container_width=True)

        col12, col13 = st.columns(2)
        fig10 = chart_11(health_data)
        col12.plotly_chart(fig10, use_container_width=True)
        
        st.dataframe(health_data.head(20).style.format({'PatientID': "{:.0f}"}))





# Initialize Google Gemini or any other Google API client using the key


if analysis_option == 'Health Care Chat Bot AI':
    ##//////start here just add paitnet + vital information.
    # data = pd.read_parquet('Health-Data-3.parquet')
    google_api_key = os.environ.get("google_key")
    llm = GoogleGemini(api_key=google_api_key)
    pandas_ai = SmartDataframe(data, config={"llm": llm, "response_parser": StreamlitResponse,"verbose": True})
    pandas_ai_2 = SmartDataframe(data, config={"llm": llm,"verbose": True})   ## string
    # Streamlit app title and description
    st.title("AI-Powered Data Analysis App")
    st.write("This application allows you to interact with your dataset using natural language prompts. Just ask a question, and the AI will provide insights based on your data.")

    # Display the dataset
    st.subheader("Dataset Preview")
    st.dataframe(data.head())

    # User input for natural language prompt
    prompt = st.text_input("Enter your prompt:", placeholder="e.g., What are the top diagnoses?")

    # Process the input and display the result
    if st.button("Submit"):
        if 'plot' in prompt or 'graph' in prompt or 'PLOT' in prompt or 'Graph' in prompt:
            try:
                result = pandas_ai.chat(prompt)
                st.subheader("Result")
            except KeyError as e:
                st.error(f"Error: {e}. Unable to retrieve result.")
        elif prompt:
            try:
                result = pandas_ai_2.chat(prompt)
                st.subheader("Result")
                st.write(result)
            except KeyError as e:
                st.error(f"Error: {e}. Unable to retrieve result.")
        else:
            st.warning("Please enter a prompt.")

    # Add a footer
    st.write("Powered by PandasAI and Google Gemini.")