Spaces:

Akankshg
/

Healthcare-PHM

Running

App Files Files Community

Healthcare-PHM / app.py

Akankshg

Update app.py

7f7384d verified 10 months ago

raw

history blame

47.9 kB

	#pip install stramlit wordcloud
	import streamlit as st
	import pandas as pd
	import matplotlib.pyplot as plt
	import plotly.express as px
	import plotly.figure_factory as ff
	import warnings
	warnings.filterwarnings("ignore")
	from wordcloud import WordCloud
	from sklearn.preprocessing import StandardScaler
	import numpy as np
	from sklearn.preprocessing import LabelEncoder
	from pandasai import SmartDataframe
	from pandasai.llm.google_gemini import GoogleGemini
	import warnings
	from pandasai.responses.response_parser import ResponseParser
	# pip install wordcloud
	# !pip install kmodes

	from sklearn.decomposition import PCA
	from sklearn.experimental import enable_iterative_imputer
	from sklearn.impute import IterativeImputer
	from kmodes.kprototypes import KPrototypes
	import plotly.graph_objects as go
	import streamlit as st
	#pip install google-generativeai


	import os
	from huggingface_hub import hf_hub_download

	repo_id = "Akankshg/ML_DATA"
	filename = "EDA_DATA.parquet"

	# Access the token
	token = os.environ["HUGGING_FACE_HUB_TOKEN"]

	# Download the file
	local_file = hf_hub_download(repo_id=repo_id, filename=filename, repo_type="dataset",token=token)



	class StreamlitResponse(ResponseParser):
	def __init__(self, context) -> None:
	super().__init__(context)

	def format_dataframe(self, result):
	st.dataframe(result["value"])
	return

	def format_plot(self, result):
	st.image(result["value"])
	return


	st.set_page_config(page_title="Healthcare Data Analysis", page_icon=":bar_chart:", layout="wide")
	st.title(':bar_chart: Healthcare Data Analysis Dashboard')
	st.markdown('<style>div.block-container{padding-top:1rem;}</style>',unsafe_allow_html=True)

	# Sidebar 1
	st.sidebar.title('Dashboard Options')
	analysis_option = st.sidebar.selectbox('Select Analysis', ['Data','EDA', 'Machine Learning','Health Care Chat Bot AI'])

	## Loading data
	@st.cache_data()
	def fetch_data():
	data = pd.read_parquet(local_file)
	return data
	data = fetch_data()

	def funnel_chart(df):
	Patient_visit = df[['PatientID','EncounterDate','LegalSex']].copy()
	Patient_visit['WeekDay'] = Patient_visit['EncounterDate'].dt.day_name()
	Patient_visit['WeekDay'] = Patient_visit['WeekDay'].astype('string')
	output_df = Patient_visit.groupby(['WeekDay', 'LegalSex']).size().unstack(fill_value=0)
	output_df.reset_index(inplace=True)
	if 'Male' in output_df.columns:
	if 'Female' in output_df.columns:
	desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
	output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
	stages = output_df['WeekDay']
	df_female = pd.DataFrame(dict(number=output_df['Female'], stage=stages))
	df_male = pd.DataFrame(dict(number=output_df['Male'], stage=stages))
	df_female['Gender'] = 'Female'
	df_male['Gender'] = 'Male'
	df_graph = pd.concat([df_male, df_female], axis=0)
	colors = {'Male': '#2986cc', 'Female': '#c90076'}
	fig2 = px.funnel(df_graph, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
	fig2.update_layout(
	template="plotly_dark",
	xaxis_title='Number of Patients',
	yaxis_title='Weekday',
	height=500, width=250
	)
	return fig2
	else:
	desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
	output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
	stages = output_df['WeekDay']
	df_male = pd.DataFrame(dict(number=output_df['Male'], stage=stages))
	df_male['Gender'] = 'Male'
	colors = {'Male': '#2986cc', 'Female': '#c90076'}
	fig2 = px.funnel(df_male, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
	fig2.update_layout(
	template="plotly_dark",
	xaxis_title='Number of Patients',
	yaxis_title='Weekday',height=500, width=250)
	return fig2
	else:
	desired_order = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
	output_df = output_df.set_index('WeekDay').reindex(desired_order).reset_index()
	stages = output_df['WeekDay']
	df_female = pd.DataFrame(dict(number=output_df['Female'], stage=stages))
	df_female['Gender'] = 'Female'
	colors = {'Male': '#2986cc', 'Female': '#c90076'}
	fig2 = px.funnel(df_female, x='number', y='stage', color='Gender', color_discrete_map=colors, title='Patient Visits by Gender and Weekday')
	fig2.update_layout(
	template="plotly_dark",
	xaxis_title='Number of Patients',
	yaxis_title='Weekday',height=500, width=250)
	return fig2

	def scatter_man(data):
	Patient_Analysis = data[['PatientID', 'GroupedICD', 'Description', 'Age']].copy()
	patients_diagnosis = Patient_Analysis[Patient_Analysis['GroupedICD'].notna()]
	patients_diagnosis_info = patients_diagnosis[['PatientID', 'GroupedICD', 'Description', 'Age']]
	patients_tests_info = patients_diagnosis_info[patients_diagnosis_info['Age'].notna()]
	patients_tests_df = pd.DataFrame(patients_tests_info)

	patients_icd_counts = patients_tests_df.groupby(['Age', 'GroupedICD','Description']).size().reset_index(name='Count')
	patients_icd_counts = patients_icd_counts[patients_icd_counts['Count']> 1000]
	import plotly.express as px
	# sns.set(rc={"axes.facecolor":"#FFF9ED","figure.facecolor":"#FFF9ED"})
	# Scatter plot
	fig5 = px.scatter(patients_icd_counts, y='Age', x='Description', size='Count',
	hover_name='Age', color='Count', title='Age - ICD Relationship',color_continuous_scale='ylorrd')
	fig5.update_layout(template="plotly_dark",xaxis_title='ICD Code', yaxis_title='Age',coloraxis_colorbar=dict(title='Count'),
	height=950, width=1400)
	return fig5


	def barplot_lab(df):
	df = df[['PatientID','EncounterDate','ComponentName', 'GroupedICD','Description']].copy()
	df.sort_values(by=['EncounterDate'], ascending=True,inplace = True)
	df['DaysSinceLastVisit'] = df.groupby('PatientID')['EncounterDate'].diff().dt.days
	df = df[df['DaysSinceLastVisit'] <= 7]
	lab = df[df['ComponentName'].notna()].copy()
	lab = lab[lab['GroupedICD'].notna()].copy()
	component= lab.groupby(['ComponentName','Description']).size().reset_index(name='Count')
	sss = component.sort_values(by='Count', ascending=False)[:20].copy()
	fig3 = px.bar(sss, x='ComponentName', y='Count',
	hover_data=['ComponentName', 'Count'], color='ComponentName', height=450, title='Lab Test')
	fig3.update_xaxes(tickangle=45)
	return fig3

	def scatterplot(df):
	df = df[['PatientID','EncounterDate','ComponentName', 'GroupedICD','Description']].copy()
	df.sort_values(by=['EncounterDate'], ascending=True,inplace = True)
	df['DaysSinceLastVisit'] = df.groupby('PatientID')['EncounterDate'].diff().dt.days
	df = df[df['DaysSinceLastVisit'] <= 7]
	lab = df[df['ComponentName'].notna()].copy()
	lab = lab[lab['GroupedICD'].notna()].copy()
	component= lab.groupby(['ComponentName','Description']).size().reset_index(name='Count')
	component = component[component['Count']> 2000]
	component['Description'].nunique()
	fig = px.scatter(component, y='ComponentName', x='Description', size='Count',
	hover_name='ComponentName', color='Count', title='Lab Component-ICD Relationship')
	fig.update_layout(template="plotly_dark",xaxis_title='ICD Code', yaxis_title='Component Name', coloraxis_colorbar=dict(title='Count'),
	height=550, width=500)
	return fig

	####################################### EDA ##################################################################
	def histplot_6(data):
	disease_data = data[['Age','LegalSex']].copy()
	disease_data = disease_data[disease_data['Age'].notna() & disease_data['LegalSex'].notna()].copy()
	fig = px.histogram(disease_data,
	x='Age',
	color='LegalSex',
	nbins=10,
	opacity=0.5,
	title='Age Distribution by Legal Sex',
	color_discrete_sequence=px.colors.qualitative.Pastel)

	# Update layout to match your desired style
	fig.update_layout(
	title_font=dict(size=20, color='white'),
	xaxis_title_font=dict(size=16, color='white'),
	yaxis_title_font=dict(size=16, color='white'),
	xaxis=dict(tickfont=dict(size=14, color='white')),
	yaxis=dict(tickfont=dict(size=14, color='white'))
	)

	return fig


	def histplot_7(data):
	import plotly.graph_objects as go
	graph3_data = data[['Age','BP Severity']].copy()
	graph3_data = graph3_data[graph3_data['BP Severity'].notna()]
	graph3_data = graph3_data[graph3_data['BP Severity'] != 'Unknown']
	graph3_data = graph3_data[graph3_data['BP Severity'] != 'BP NORMAL']

	severities = graph3_data['BP Severity'].unique()
	lines = []

	for severity in severities:
	severity_data = graph3_data[graph3_data['BP Severity'] == severity]
	age_counts = severity_data['Age'].value_counts().sort_index()
	lines.append(go.Scatter(x=age_counts.index, y=age_counts.values, mode='lines+markers', name=severity))

	fig = go.Figure(data=lines)

	fig.update_layout(
	title='Age Distribution by BP Severity',
	xaxis_title='Age',
	yaxis_title='Count',
	title_font=dict(size=20, color='white')
	)

	return fig


	def pie_chart_7(data):
	import plotly.graph_objects as go

	# Prepare data
	graph_4 = data[['Depression Severity']].copy()
	graph_4 = graph_4[graph_4['Depression Severity'] != 'None-minimal']
	graph_4 = graph_4[graph_4['Depression Severity'] != 'Unknown']
	severity_counts = graph_4['Depression Severity'].value_counts()

	# Define colors
	colors_inner = ['#FF5733', '#FFC300', '#36A2EB', '#C71585']

	# Create plotly figure
	fig = go.Figure()

	# Add donut chart
	fig.add_trace(go.Pie(
	labels=severity_counts.index,
	values=severity_counts,
	hole=0.6, # Hole size for donut chart
	marker=dict(colors=colors_inner),
	textinfo='label+percent',
	textfont=dict(size=10),
	insidetextorientation='radial'
	))

	# Update layout for title and appearance
	fig.update_layout(
	title_text="Distribution of Patients by Depression",
	title_font_size=20,
	title_font_color='white',
	# paper_bgcolor='black',
	# plot_bgcolor='black',
	autosize=False,
	# width=500,
	# height=450,
	)

	# Show figure
	return fig

	def chart_8(data):
	import plotly.graph_objects as go
	graph_5 = data[['BP Severity', 'BMI', 'LegalSex']].copy()
	graph_5 = graph_5.dropna(subset=['BP Severity', 'BMI', 'LegalSex'])
	graph_5 = graph_5[graph_5['BP Severity'] != 'Unknown']
	graph_5 = graph_5[graph_5['BP Severity'] != 'BP NORMAL']

	# Create box plot
	fig = go.Figure()

	# Add box plot traces for each gender
	for gender in graph_5['LegalSex'].unique():
	filtered_data = graph_5[graph_5['LegalSex'] == gender]
	fig.add_trace(go.Box(
	y=filtered_data['BMI'],
	x=filtered_data['BP Severity'],
	name=gender,
	boxmean='sd', # Show mean and standard deviation
	marker_color='#1f77b4' if gender == 'Male' else '#ff7f0e', # Different colors for genders
	text=filtered_data['BP Severity'], # Adding text for tooltips
	hoverinfo='y+name+text'
	))

	# Update layout with titles, axis labels, and other properties
	fig.update_layout(
	title='BMI by BP Severity and Legal Sex',
	title_font=dict(size=20, color='white'),
	xaxis_title='BP Severity',
	yaxis_title='BMI',
	xaxis=dict(tickfont=dict(size=14, color='white')),
	yaxis=dict(tickfont=dict(size=14, color='white')),
	boxmode='group', # Group box plots by BP Severity
	height=600, # Set the height of the figure
	width=800, # Set the width of the figure
	# paper_bgcolor='#FAF5E6',
	# plot_bgcolor='#FAF5E6'
	)

	return fig


	def chart_9(data):
	import plotly.graph_objects as go
	disease_data = data.copy()
	disease_data = disease_data.select_dtypes(include=['int64', 'float64'])
	columns_to_drop = ['PatientID']
	disease_data.drop(columns=columns_to_drop, inplace=True)

	# Calculate the correlation matrix
	corrmat = disease_data.corr()
	corrmat.fillna(0, inplace=True)

	# Create a heatmap using Plotly
	fig = go.Figure(data=go.Heatmap(
	z=corrmat.values,
	x=corrmat.columns,
	y=corrmat.columns,
	colorscale='RdYlGn',
	# colorbar=dict(title='Correlation', tickvals=[-1, 0, 1], ticktext=['-1', '0', '1']),
	text=corrmat.round(2).values, # Add annotations
	texttemplate="%{text:.2f}", # Format annotations
	textfont=dict(size=12, color='black') # Set annotation font size and color
	))

	# Update layout
	fig.update_layout(
	title='Which Feature is Mainly Involved',
	title_font=dict(size=20, color='white'),
	xaxis_title='Features',
	yaxis_title='Features',
	xaxis=dict(tickfont=dict(size=14, color='white')),
	yaxis=dict(tickfont=dict(size=14, color='white')),
	height=600, # Set the height of the figure
	width=800 # Set the width of the figure
	)

	return fig

	def chart_10(data):
	import plotly.express as px
	import plotly.graph_objects as go

	graph_7 = data.copy()
	graph_7 = graph_7[graph_7['Depression Severity'] != 'None-minimal']
	graph_7 = graph_7[graph_7['Depression Severity'] != 'Unknown']
	graph_7['Age'] = pd.to_numeric(graph_7['Age'], errors='coerce')
	graph_7 = graph_7.dropna(subset=['Age','Depression Severity','LegalSex'])

	# Create the violin plot
	fig = go.Figure()

	for sex in graph_7['LegalSex'].unique():
	fig.add_trace(go.Violin(
	x=graph_7['Depression Severity'][graph_7['LegalSex'] == sex],
	y=graph_7['Age'][graph_7['LegalSex'] == sex],
	legendgroup=sex, scalegroup=sex, name=sex, side='negative' if sex == 'Female' else 'positive',
	line_color='blue' if sex == 'Female' else 'orange'
	))

	# Update the layout
	fig.update_layout(
	title="Age by Depression Severity and Legal Sex",
	xaxis_title="Depression Severity",
	yaxis_title="Age",
	xaxis=dict(tickmode='array', tickvals=graph_7['Depression Severity'].unique(), tickangle=20),
	yaxis=dict(range=[0, 80]),
	violingap=0.2, # gap between violins
	violingroupgap=0.3, # gap between groups
	violinmode='overlay', # plot violins over each other
	font=dict(color='white', size=14),
	title_font=dict(size=20, color='white'),
	xaxis_tickfont=dict(size=14, color='white'),
	yaxis_tickfont=dict(size=14, color='white'),
	paper_bgcolor='rgba(0,0,0,0)',
	plot_bgcolor='rgba(0,0,0,0)',
	showlegend=True
	)

	return fig


	def feature_analytics(disease_data):
	corrmat = disease_data.corr( numeric_only = True)
	corr_threshold = 0.7
	selected_features = []
	for column in corrmat.columns[:]:
	correlated_features = corrmat.index[corrmat[column] > corr_threshold].tolist()
	if correlated_features:
	selected_features.extend(correlated_features)
	selected_features = list(set(selected_features))
	values_to_pop = ['Weight', 'DiastolicBP', 'SystolicBP', 'ComponentValue', 'Height', 'Age', 'BMI']
	for value in values_to_pop:
	if value in selected_features:
	selected_features.remove(value)
	values_to_find = ['PeakFlow', 'Temperature', 'Respiration', 'Pulse', 'SPO2']
	found_values = []
	l = []
	m = []
	not_found_values = []
	for i, value in enumerate(selected_features):
	if value in values_to_find:
	found_values.append((i, value))
	l.append(value)
	else:
	not_found_values.append((i, value))
	m.append(value)
	return l,m



	def chart_11(disease_data):
	import plotly.express as px
	feature = feature_analytics(disease_data)
	select,featurel = feature
	Top_feature_Lab = select[0]
	graph_8 = disease_data.copy()
	graph_8 = graph_8.dropna(subset=[Top_feature_Lab, 'Age', 'LegalSex'])

	# Create the scatter plot with Plotly
	fig = px.scatter(
	graph_8,
	x=Top_feature_Lab,
	y="Age",
	color="LegalSex",
	color_discrete_sequence=px.colors.qualitative.Set2,
	title=f'Age group: {Top_feature_Lab}',
	labels={Top_feature_Lab: Top_feature_Lab, 'Age': 'Age'},
	size_max=200
	)

	# Add vertical line at the mean
	mean_value = graph_8[Top_feature_Lab].mean()
	fig.add_vline(x=mean_value, line=dict(color='red', dash='dash'))

	# Customize the layout
	fig.update_layout(
	title_font=dict(size=20, color='white'),
	xaxis_title_font=dict(size=16, color='white'),
	yaxis_title_font=dict(size=16, color='white'),
	xaxis=dict(tickangle=20, tickfont=dict(size=14, color='white')),
	yaxis=dict(tickfont=dict(size=14, color='white'), range=[0, 80]),
	plot_bgcolor='black',
	paper_bgcolor='black'
	)

	return fig




	def chart_12(filtered_data):
	graph_10 = filtered_data.copy()
	no_nan = graph_10.dropna(subset=['ImmunizationName'])
	immu = list(no_nan['ImmunizationName'])
	filtered_data = [item for item in immu if item and not pd.isna(item)]
	unique_values = set(filtered_data)
	my_string = ' '.join(unique_values)
	lmao = my_string.strip(', ')
	lmao = lmao.replace(',', '')
	title = "Immunization Word Cloud"
	cloud = WordCloud(scale=3,
	max_words=150,
	colormap='RdYlGn',
	mask=None,
	background_color='white',
	stopwords=None,
	collocations=True,
	contour_color='black',
	contour_width=1).generate(lmao)
	# axes[2,2].imshow(cloud, interpolation='bilinear')
	# axes[2,2].axis('off')
	# axes[2,2].set_title( f'Immunization',color='white', fontsize=20)
	plt.show()



	def mean_of_values(cell_value):
	if pd.isna(cell_value): # Check if cell value is NaN
	return np.nan
	values = [float(val) for val in cell_value.split(',')]
	return sum(values) / len(values)

	def plots(original_data):
	a = original_data.copy()
	st.subheader("Clustering Analysis")
	col1, col2 = st.columns(2)
	## 1
	cluster_counts = a['cluster'].value_counts().reset_index()
	cluster_counts.columns = ['cluster', 'count'] # Rename columns
	fig_1 = px.bar(cluster_counts, y='cluster', x='count',
	labels={'cluster': 'Cluster', 'count': 'Count'},
	text_auto=True, # text_auto=True displays the count on top of the bars
	color='cluster', # Assign different colors to each bar
	color_continuous_scale='plasma', # Use the plasma color scale
	category_orders={'cluster': [0, 1, 2, 3, 4]},
	) # Set the order of clusters

	custom_labels = {0: 'Cluster 0', 1: 'Cluster 1', 2: 'Cluster 2', 3: 'Cluster 3', 4: 'Cluster 4'}
	fig_1.update_yaxes(tickvals=[0, 1, 2, 3, 4], ticktext=list(custom_labels.values()))

	fig_1.update_layout(
	title={'text': "Count of Data Points per Cluster", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	yaxis_title='Cluster', xaxis_title='Count',
	xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	title_font=dict(color='white', size=18),
	# plot_bgcolor='black', # Background color
	# paper_bgcolor='black', # Paper background color
	title_x=0.5, # Center the title
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	))
	col1.plotly_chart(fig_1,use_container_width=True)

	## 2
	fig_2 = px.scatter(a, x='Age', y='BMI',
	color='cluster',
	title="Cluster's Profile Based On Age And BMI",
	color_continuous_scale='plasma') # Use the plasma color palette

	fig_2.update_layout(
	title={'text': "Cluster's Profile Based On Age And BMI", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	xaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
	yaxis=dict(showgrid=False, showticklabels=False, zeroline=False),
	# plot_bgcolor='black', # Background color
	# paper_bgcolor='black', # Paper background color
	title_font=dict(color='white', size=18), # Title font color and size
	margin=dict(l=20, r=20, t=40, b=20), # Set margins to make the plot more compact
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	)
	)
	fig_2.update_traces(marker=dict(size=12, line=dict(width=2, color='DarkSlateGrey')))

	col2.plotly_chart(fig_2,use_container_width=True)

	col3, col4 = st.columns(2)
	## 3
	palette = ['#636EFA', '#EF553B'] # Adjust the colors as needed
	fig_3 = go.Figure()
	for sex in a['LegalSex'].unique():
	fig_3.add_trace(go.Box(
	y=a[a['LegalSex'] == sex]['cluster'],
	name=f'Legal Sex: {sex}',
	marker_color=palette.pop(0), # Pop the first color from the palette
	boxmean=True
	))
	fig_3.update_layout(
	title={'text':"Clusters Distribution by Legal Sex", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	# plot_bgcolor='black',
	# paper_bgcolor='black',
	xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	# plot_bgcolor='rgba(0,0,0,0)',
	# paper_bgcolor='rgba(0,0,0,0)',
	title_font_color='white',
	showlegend=True,
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	)
	)

	col3.plotly_chart(fig_3,use_container_width=True)

	## 4
	# palette = ['#636EFA', '#EF553B', '#00CC96', '#AB63FA', '#FFA15A'] # Example palette
	fig_4 = px.violin(
	a,
	x="BP Severity",
	y="cluster",
	color="BP Severity",
	color_discrete_sequence=px.colors.qualitative.Vivid,
	box=True, # Adds a box plot inside the violin plot for more detail
	points="all", # Shows all data points
	title="Clusters Distribution by BP Severity"
	)
	fig_4.update_layout(
	title={'text':"Clusters Distribution by BP Severity", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	xaxis_title="BP Severity",
	yaxis_title="Cluster",
	# plot_bgcolor='black',
	# paper_bgcolor='black',
	xaxis_title_font=dict(size=16, color='white'),
	yaxis_title_font=dict(size=16, color='white'),
	xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	title_font_color='white',
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	)
	)

	fig_4.update_xaxes(tickangle=45) # Rotate the x-axis labels for better readability

	col4.plotly_chart(fig_4,use_container_width=True)

	col5, col6 = st.columns(2)
	## 5
	fig_5 = px.histogram(a, x="Depression Severity", color="cluster",
	color_discrete_sequence=px.colors.diverging.RdYlBu,
	title='Clusters Distribution by Depression Severity')

	# Update layout to make it more attractive
	fig_5.update_layout(
	title={'text':"Clusters Distribution by Depression Severity", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	# plot_bgcolor='black',
	# paper_bgcolor='black',
	title_font_color='white',
	xaxis_title='Depression Severity',
	yaxis_title='Count',
	xaxis_title_font_color='white',
	yaxis_title_font_color='white',
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	),
	xaxis=dict(
	tickfont=dict(color='white', size=14),
	title_font=dict(color='white', size=16),
	showline=False,
	showgrid=False,
	ticks=''
	),
	yaxis=dict(
	tickfont=dict(color='white', size=14),
	title_font=dict(color='white', size=16),
	showline=False,
	showgrid=False,
	ticks=''
	),
	coloraxis_colorbar=dict(
	tickfont=dict(color='white')
	)
	)

	# Show the plot
	col5.plotly_chart(fig_5,use_container_width=True)

	## 6
	fig_6 = px.violin(a, y="cluster", x="Temp_condition", box=True, points="all",
	color="Temp_condition", color_discrete_sequence=px.colors.diverging.RdYlBu,
	title='Clusters Distribution by Temp_condition')

	# Update layout to make it more attractive
	fig_6.update_layout(
	title={'text':"Clusters Distribution by Temp_condition", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	# plot_bgcolor='black',
	# paper_bgcolor='black',
	title_font_color='white',
	xaxis_title='Temp_condition',
	yaxis_title='Clusters',
	xaxis_title_font_color='white',
	yaxis_title_font_color='white',
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	),
	xaxis=dict(
	tickfont=dict(color='white', size=14),
	title_font=dict(color='white', size=16),
	showline=False,
	showgrid=False,
	ticks=''
	),
	yaxis=dict(
	tickfont=dict(color='white', size=14),
	title_font=dict(color='white', size=16),
	showline=False,
	showgrid=False,
	ticks=''
	),
	coloraxis_colorbar=dict(
	tickfont=dict(color='white')
	)
	)

	# Show the plot
	col6.plotly_chart(fig_6,use_container_width=True)

	col7, col8 = st.columns(2)

	##7
	# Create the stacked bar chart
	ad = a.groupby(['weight_condition', 'cluster']).size().reset_index(name='count')

	fig_7 = px.bar(ad,
	x='weight_condition',
	y='count',
	color='cluster',
	title='Clusters Distribution by Weight Condition',
	text='count',
	barmode='stack',
	color_discrete_sequence=px.colors.diverging.RdYlBu) # Use a color scale or palette of your choice

	# Update layout to make it more attractive and remove axes elements
	fig_7.update_layout(
	title={'text': 'Clusters Distribution by Weight Condition', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	xaxis=dict(
	title='', # Remove x-axis title
	showline=False,
	showgrid=False,
	zeroline=False,
	tickfont=dict(size=14, color='white'),
	tickangle=45 # Rotate x-axis labels for better readability
	),
	yaxis=dict(
	title='', # Remove y-axis title
	showline=False,
	showgrid=False,
	zeroline=False,
	tickfont=dict(size=14, color='white')
	),
	# plot_bgcolor='black', # Background color
	# paper_bgcolor='black', # Paper background color
	margin=dict(l=20, r=20, t=40, b=20), # Set margins to make the plot more compact
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	)
	)

	# Update bar text style
	fig_7.update_traces(texttemplate='%{text:.2s}', textfont_size=14, textposition='inside', marker=dict(line=dict(width=1, color='DarkSlateGrey')))

	# Show the plot
	col7.plotly_chart(fig_7,use_container_width=True)


	## 8
	fig_8 = px.box(a,
	x='SPO2_condition',
	y='Age',
	points='all', # Show all points
	title="Clusters Distribution by SPO2_condition",
	color='cluster',
	color_discrete_sequence=px.colors.sequential.Plasma_r)

	# Update layout to remove axes titles, labels, and gridlines, and style the chart
	fig_8.update_layout(
	title={'text': "Clusters Distribution by SPO2_condition", 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
	title_font=dict(color='white', size=18),
	xaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	yaxis=dict(showline=False, showgrid=False, zeroline=False, tickfont=dict(size=14, color='white')),
	# plot_bgcolor='black', # Background color
	# paper_bgcolor='black', # Paper background color
	margin=dict(l=20, r=20, t=40, b=20), # Set margins to make the plot more compact
	legend=dict(
	font=dict(size=16, color='white'),
	bgcolor='rgba(0,0,0,0)'
	)
	)

	# Customize the boxen plot appearance
	fig_8.update_traces(
	boxmean=True, # Add mean line
	jitter=0.3, # Spread points along x-axis
	marker=dict(size=10, line=dict(width=2, color='DarkSlateGrey'))
	)

	# Show the plot
	col8.plotly_chart(fig_8,use_container_width=True)

	col_11 = st.columns(1)[0]
	fig_11 = px.scatter_matrix(
	a[['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI', 'cluster']],
	dimensions=['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI'],
	color='cluster',
	title="Scatter Matrix of Selected Features by Cluster",
	labels={col: col for col in ['Age', 'SystolicBP', 'Pulse', 'Weight', 'BMI']},
	color_continuous_scale= px.colors.diverging.Spectral
	)

	# Update layout for better visualization
	fig_11.update_traces(diagonal_visible=True)
	fig_11.update_layout(height=700, width=700, showlegend=True)

	# Show the plot
	col_11.plotly_chart(fig_11,use_container_width=True)
	#

	##### Joint Plot
	st.subheader("Summary")
	meanvalue_columns = [col for col in list(a.columns) if 'meanvalue' in col]
	# Group data by clusters
	grouped_data = a.groupby('cluster')

	# Calculate mean for numerical columns
	numerical_columns = a.select_dtypes(include=['number']).columns
	numerical_summary = grouped_data[numerical_columns].mean()

	# Calculate mode for categorical columns
	categorical_columns = a.select_dtypes(include=['object', 'category','string']).columns
	categorical_summary = grouped_data[categorical_columns].agg(lambda x: x.value_counts().index[0])

	for i in range(len(a['cluster'].value_counts())):
	# Example for Cluster 0
	cluster_traits = {
	"Age": numerical_summary.loc[i, 'Age'],
	"Age_Category": categorical_summary.loc[i,"Age_Category"],
	"SystolicBP": numerical_summary.loc[i, 'SystolicBP'],
	"Depression Severity": categorical_summary.loc[i, 'Depression Severity'],
	"Weight Condition" : categorical_summary.loc[i, 'weight_condition'],
	"BP Severity" : categorical_summary.loc[i, 'BP Severity'],
	"Pulse_condition" : categorical_summary.loc[i, 'Pulse_condition'],
	"Respiration_condition" : categorical_summary.loc[i, 'Respiration_condition'],
	"SPO2_condition" : categorical_summary.loc[i, 'SPO2_condition'],

	}

	# if numerical_summary.loc[i, 'GLUCOSE_meanvalue'] > 100:
	# glucose_condition = "High frequency of patients with slightly elevated glucose levels."
	# else:
	# glucose_condition = "Normal glucose levels."



	# Writing the summary
	summary = f"""
	Cluster - {i} Traits
	1. Age: Average age is {round(cluster_traits['Age'])} years.
	2. SystolicBP: Patients tend to have slightly elevated systolic blood pressure, averaging {cluster_traits['SystolicBP']} mmHg.
	3. Depression Severity: Predominantly '{cluster_traits['Depression Severity']}'.
	4. "Weight Condition" : {cluster_traits['Weight Condition']}.
	5. "Respiration_condition" : {cluster_traits['Respiration_condition']}.
	6. "Pulse_condition" : {cluster_traits['Pulse_condition']}.
	7. "SPO2_condition" : {cluster_traits['SPO2_condition']}.

	Trait Summary: Cluster {i} mainly consists of {cluster_traits['Age_Category']} individuals with {cluster_traits['Depression Severity']} depression level, {cluster_traits['BP Severity'].lower()}.
	"""

	st.write(summary)
	st.write(round(numerical_summary[meanvalue_columns],2))

	st.subheader("Density Contour Plot")
	with st.container():
	# Loop through the columns and create plots
	for i in meanvalue_columns:
	fig = px.density_contour(
	a, # Replace 'a' with your actual DataFrame name
	y="Age",
	x=i,
	color="cluster",
	marginal_x="histogram",
	marginal_y="histogram",
	template="simple_white",
	color_discrete_sequence=px.colors.qualitative.Set1
	)

	# Add fill to the contours for a similar effect to kde
	fig.update_traces(bingroup="fill")

	# Update layout for better aesthetics
	fig.update_layout(
	title=f"Joint Density Contour of {i} vs Age by Clusters",
	yaxis_title="Age",
	xaxis_title=i,
	xaxis=dict(
	title=i,
	showline=False,
	showgrid=False,
	zeroline=False,
	tickfont=dict(size=14, color='white'),
	tickangle=45, # Rotate x-axis labels for better readability
	titlefont=dict(size=16, color='white') # Set x-axis title to white
	),
	yaxis=dict(
	title='Age',
	showline=False,
	showgrid=False,
	zeroline=False,
	tickfont=dict(size=14, color='white'),
	titlefont=dict(size=16, color='white') # Set y-axis title to white
	),
	plot_bgcolor='black',
	paper_bgcolor='black',
	title_font_color='white',
	legend_title="Clusters",
	width=1500, # Adjust width as needed
	height=800 # Increase height to make the plot taller
	)

	# Display the plot using st.plotly_chart within a column
	st.plotly_chart(fig, use_container_width=True)


	def ML(filtered_data, scaler, unscaled_data):
	man = filtered_data.copy()
	man=man.dropna()

	man.drop(columns=['PatientID','VisitID'],inplace=True)
	numerical_columns = list(man.select_dtypes(include=['int', 'float']).columns)
	categorial_columns = list(man.select_dtypes(exclude=['int', 'float','datetime']).columns)
	categorical_indexes = []

	for c in categorial_columns:
	categorical_indexes.append(man.columns.get_loc(c))

	t = man.shape
	# st.write(t)
	if 5 < t[0] < 10:
	ki = 3
	elif t[0] <= 4 :
	ki = 1
	else:
	ki = 4
	kproto = KPrototypes(n_clusters= ki, init='Huang', n_init = 25, random_state=42)
	kproto.fit_predict(man, categorical= categorical_indexes)
	cluster_labels = kproto.labels_

	original_numeric_data = scaler.inverse_transform(man[numerical_columns])

	# Convert back to DataFrame and add cluster labels
	original_data = pd.DataFrame(original_numeric_data, columns=numerical_columns)
	original_data["cluster"] = cluster_labels
	original_data["cluster"] = original_data["cluster"].astype('category')

	## PCA Graph
	pca = PCA(n_components=4)
	pca_df = pca.fit_transform(original_data[numerical_columns])
	d = list(original_data[numerical_columns].columns)
	pca_df = pd.DataFrame(pca_df, columns=d[:4])

	import plotly.graph_objects as go

	st.subheader("PCA")
	fig_9 = go.Figure(
	go.Scatter3d(mode='markers',
	x = pca_df.iloc[:, 0],
	y = pca_df.iloc[:, 1],
	z = pca_df.iloc[:, 2],
	marker=dict(size = 4, color = original_data['cluster'], colorscale = 'spectral')
	)
	)

	fig_9.update_layout(
	scene=dict(
	xaxis_title=d[0],
	yaxis_title=d[1],
	zaxis_title=d[2],
	# bgcolor='black', # Background color inside the 3D plot
	xaxis=dict(color='white'), # Axis label color
	yaxis=dict(color='white'),
	zaxis=dict(color='white')
	),
	# plot_bgcolor='black', # Background color outside the 3D plot
	# paper_bgcolor='black' # Paper (entire plot area) background color
	)
	col9 = st.columns(1)[0]
	col9.plotly_chart(fig_9, use_container_width=True)




	mann = man[categorial_columns].copy()
	orig = original_data.reset_index(drop=True)
	mann = mann.reset_index(drop=True)
	original_data = pd.concat([orig, mann], axis=1)

	return plots(original_data)



	def imputer(filtered_data):
	numeric_columns = filtered_data.select_dtypes(include=['int', 'float'])
	numeric_columns = numeric_columns.iloc[:,2:].copy()

	# Setting the random_state argument for reproducibility
	imputer = IterativeImputer(random_state=42)
	imputed = imputer.fit_transform(numeric_columns)
	Imputed_data = pd.DataFrame(imputed, columns=numeric_columns.columns)
	Imputed_data = round(Imputed_data, 2)
	columns_drop = Imputed_data.columns
	filtered_data = filtered_data.drop(columns=columns_drop)
	Ml_data = pd.concat([filtered_data, Imputed_data], axis=1)
	unscaled_data = Ml_data.copy()

	##Scaling
	scaled_data = Ml_data.select_dtypes(include=['int', 'float'])
	scaled_data = scaled_data.iloc[:,2:].copy()
	scaler = StandardScaler()
	scaler.fit(scaled_data)
	scaled_data = pd.DataFrame(scaler.transform(scaled_data),columns= scaled_data.columns)
	columns_drop = scaled_data.columns
	Ml_data = Ml_data.drop(columns=columns_drop)
	Ml_data = pd.concat([Ml_data, scaled_data], axis=1)
	Ml_data = Ml_data.convert_dtypes() # change this to outlier_removed if you want outliwer to be removed
	return ML(Ml_data, scaler, unscaled_data)


	filename_1 = "ML_DATA.parquet"

	# Access the token
	token = os.environ["HUGGING_FACE_HUB_TOKEN"]

	# Download the file
	local_file_1 = hf_hub_download(repo_id=repo_id, filename=filename_1,repo_type="dataset", token=token)

	@st.cache_data()
	def fetch_data_1():
	data = pd.read_parquet(local_file_1)
	return data



	if analysis_option == 'Machine Learning':
	data = fetch_data_1()
	problem = list(data['Description'].unique())
	st.subheader("_Select Disease_:sunglasses:")
	health_option = st.selectbox("_Select Disease_:sunglasses:",['', *problem], label_visibility="collapsed")
	filtered_data = data[data['Description'] == health_option].copy()
	if filtered_data['key_lab2'].notna().any():
	column_list = ['PatientID', 'VisitID', 'GroupedICD'] + list(filtered_data['key_lab2'].iloc[0])
	pivot_data = pd.pivot_table(filtered_data, values='ComponentValue', index=['PatientID', 'VisitID', 'GroupedICD'], columns='ComponentName', aggfunc=lambda x: ', '.join(map(str, x)))
	pivot_data = pivot_data.reset_index(drop=False)
	pivot_data = pivot_data[column_list].copy()
	filtered_data = pd.merge(filtered_data, pivot_data, on=['PatientID', 'VisitID','GroupedICD'], how='left')

	filtered_data.iloc[:, -20:] = filtered_data.iloc[:, -20:].convert_dtypes()
	hmm = pd.DataFrame()
	# num_columns = 20
	num_columns = len(list(filtered_data['key_lab2'].iloc[0]))
	for i in range(1, num_columns+1):
	existing_column = filtered_data.columns[-i]
	new_column_name = f'{existing_column}_meanvalue'
	hmm[new_column_name] = filtered_data[existing_column].apply(mean_of_values)
	filtered_data = pd.concat([filtered_data, hmm], axis=1)
	column_list = [
	## Necessary columns
	'PatientID', 'VisitID', 'GroupedICD',

	## Numerical values
	'Age', 'SystolicBP',
	'DiastolicBP','Temperature',
	'Pulse', 'Weight', 'Height', 'BMI', 'Respiration',
	'SPO2', 'PHQ_9Score',
	# 'PeakFlow'

	## Categorial Values
	'LegalSex','BPLocation', 'BPPosition', 'PregnancyStatus', 'LactationStatus', 'TemperatureSource',
	'Age_Category','BP Severity','Depression Severity','weight_condition', 'Temp_condition', 'Pulse_condition',
	'Respiration_condition', 'SPO2_condition', 'PeakF_condition']
	# last = list(filtered_data.columns[-20:])
	last = list(hmm.columns)
	required_columns = column_list + last
	filtered_data = filtered_data[required_columns].copy()
	filtered_data = filtered_data.drop_duplicates().reset_index(drop=True)
	filtered_data = filtered_data.dropna(axis=1, how='all')
	imputer(filtered_data)































































	if analysis_option == 'Data':
	age_min = int(data['Age'].min())
	age_max = int(data['Age'].max())
	age_range = st.sidebar.slider('Select Age Range', age_min, age_max, (age_min, age_max))
	data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])].copy()

	Sex = data.groupby('LegalSex')['PatientID'].nunique().reset_index(name='count')
	st.subheader("Distribution of Patient's by Sex", divider='rainbow')
	col1, col2,col3 = st.columns(3)
	col1.metric(label="Male", value = Sex[Sex['LegalSex']=='Male']['count'][1])
	col2.metric(label="Female", value = Sex[Sex['LegalSex']=='Female']['count'][0])
	col4, col5 = st.columns(2)
	fig2 = funnel_chart(data)
	col4.plotly_chart(fig2, use_container_width=True)
	fig = scatterplot(data)
	col5.plotly_chart(fig, use_container_width=True)
	col6 = st.columns(1)[0]
	fig_man = scatter_man(data)
	col6.plotly_chart(fig_man, use_container_width=True)

	st.dataframe(data.head(20).style.format({'PatientID': "{:.0f}"}))

	if analysis_option == 'EDA':
	age_min = int(data['Age'].min())
	age_max = int(data['Age'].max())
	age_range = st.sidebar.slider('Select Age Range', age_min, age_max, (age_min, age_max))
	data = data[(data['Age'] >= age_range[0]) & (data['Age'] <= age_range[1])].copy()

	problem = list(data['Description'].unique())
	st.subheader("_Select Disease_:sunglasses:")
	health_option = st.selectbox("_Select Disease_:sunglasses:",['', *problem], label_visibility="collapsed")
	if health_option in problem:
	health_data = data[data['Description'] == health_option].copy()
	Sex = health_data.groupby('LegalSex')['PatientID'].nunique().reset_index(name='count')
	st.subheader(f"Patients for '{health_option}' by Sex", divider='rainbow')
	col1, col2, col3 = st.columns(3)
	if 'Male' in Sex['LegalSex'].values:
	col1.metric(label="Male", value=Sex[Sex['LegalSex'] == 'Male']['count'].iloc[0])
	else:
	col1.metric(label="Male", value=0)
	if 'Female' in Sex['LegalSex'].values:
	col2.metric(label="Female", value=Sex[Sex['LegalSex'] == 'Female']['count'].iloc[0])
	else:
	col2.metric(label="Male", value=0)
	col4, col5 = st.columns(2)
	fig2 = funnel_chart(health_data)
	col4.plotly_chart(fig2, use_container_width=True)

	fig3 = barplot_lab(health_data)
	col5.plotly_chart(fig3, use_container_width=True)

	col6, col7 = st.columns(2)
	fig4 = histplot_6(health_data)
	col6.plotly_chart(fig4, use_container_width=True)

	fig5 = histplot_7(health_data)
	col7.plotly_chart(fig5, use_container_width=True)

	col8, col9 = st.columns(2)
	fig6 = pie_chart_7(health_data)
	col8.plotly_chart(fig6, use_container_width=True)

	fig7 = chart_8(health_data)
	col9.plotly_chart(fig7, use_container_width=True)


	col10, col11 = st.columns(2)
	fig8 = chart_9(health_data)
	col10.plotly_chart(fig8, use_container_width=True)

	fig9 = chart_10(health_data)
	col11.plotly_chart(fig9, use_container_width=True)

	col12, col13 = st.columns(2)
	fig10 = chart_11(health_data)
	col12.plotly_chart(fig10, use_container_width=True)

	st.dataframe(health_data.head(20).style.format({'PatientID': "{:.0f}"}))





	# Initialize Google Gemini or any other Google API client using the key


	if analysis_option == 'Health Care Chat Bot AI':
	##//////start here just add paitnet + vital information.
	# data = pd.read_parquet('Health-Data-3.parquet')
	google_api_key = os.environ.get("google_key")
	llm = GoogleGemini(api_key=google_api_key)
	pandas_ai = SmartDataframe(data, config={"llm": llm, "response_parser": StreamlitResponse,"verbose": True})
	pandas_ai_2 = SmartDataframe(data, config={"llm": llm,"verbose": True}) ## string
	# Streamlit app title and description
	st.title("AI-Powered Data Analysis App")
	st.write("This application allows you to interact with your dataset using natural language prompts. Just ask a question, and the AI will provide insights based on your data.")

	# Display the dataset
	st.subheader("Dataset Preview")
	st.dataframe(data.head())

	# User input for natural language prompt
	prompt = st.text_input("Enter your prompt:", placeholder="e.g., What are the top diagnoses?")

	# Process the input and display the result
	if st.button("Submit"):
	if 'plot' in prompt or 'graph' in prompt or 'PLOT' in prompt or 'Graph' in prompt:
	try:
	result = pandas_ai.chat(prompt)
	st.subheader("Result")
	except KeyError as e:
	st.error(f"Error: {e}. Unable to retrieve result.")
	elif prompt:
	try:
	result = pandas_ai_2.chat(prompt)
	st.subheader("Result")
	st.write(result)
	except KeyError as e:
	st.error(f"Error: {e}. Unable to retrieve result.")
	else:
	st.warning("Please enter a prompt.")

	# Add a footer
	st.write("Powered by PandasAI and Google Gemini.")