agexp / app.py
syedislamuddin's picture
Upload 17 files
400a76d
#All Plots in THIS APP ARE BASED ON pegasus (As opposed to scanpy in previous versions)
import os
import numpy as np
import streamlit as st
#from st_aggrid import AgGrid, GridOptionsBuilder,GridUpdateMode,DataReturnMode
#import scanpy as sc
import pegasus as pg
from pandas import read_csv, pivot
import seaborn as sns
from seaborn import clustermap
import matplotlib.pyplot as plt
from matplotlib.pyplot import rc_context
from matplotlib import rcParams
#import matplotlib as mpl
#import pandas as pd
#import matplotlib.font_manager as fm
import matplotlib.pyplot as plt
plt.rcParams.update({'figure.autolayout': True})
plt.rcParams['axes.linewidth'] = 0.001
#from functions import pathway_analyses
#sc.settings.set_figure_params(dpi=80, facecolor='white',fontsize=12)
@st.cache_data
def get_data():
if 'adata_annot' not in st.session_state or 'cell_type' not in st.session_state or 'broad_type' not in st.session_state or 'cluster_table' not in st.session_state or 'go_table' not in st.session_state or 'all_pwaydata' not in st.session_state:
adata_annot = pg.read_input(cwd+'new_multiregion_brainaging_annotated.h5ad')
st.session_state['adata_annot'] = adata_annot
if 'genes_list' not in st.session_state:
genes=adata_annot.var.index
#genes_list=sorted(genes.unique())
st.session_state['genes_list'] = sorted(genes.unique())
if 'cell_type' not in st.session_state:
anno=adata_annot.obs.new_anno
st.session_state['cell_type'] = sorted(anno.unique())
#Also load Go Terms
if 'go_table' not in st.session_state or 'all_pwaydata' not in st.session_state:
###new
#All pathways database
go_table=['Aging_Perturbations_from_GEO_DOWN','Aging_Perturbations_from_GEO_UP','Disease_Perturbations_from_GEO_down','Disease_Perturbations_from_GEO_up','GO_Biological_Process_2021','GO_Cellular_Component_2021','GO_Molecular_Function_2021','KEGG 2021 Human','Wiki 2021 Human']
all_pwaydata={}
#get pathways
import pandas as pd
for f in go_table:
all_pwaydata[f]=pd.read_csv("megan_pathways/"+f+".csv")
st.session_state['go_table']=go_table
st.session_state['all_pwaydata']=all_pwaydata
###new
#Also get clustermap data set
if 'cluster_table' not in st.session_state:
st.session_state['cluster_table'] = pd.read_csv('aging.glmmtmb_age_diffs_fdr.csv',index_col=0)
#done load Data
#disable st.pyplot warning
st.set_page_config(layout="wide")
st.markdown(
"""
<style>
.streamlit-expanderHeader {
font-size: x-large;
}
</style>
""",
unsafe_allow_html=True,
)
m=st.markdown("""
<style>
div.stTitle {
font-size:40px;
}
</style>"""
,unsafe_allow_html=True)
st.set_option('deprecation.showPyplotGlobalUse', False)
#load Data
cwd=os.getcwd()+'/'#+'data/'
def convert_df(df):
return df.to_csv().encode('utf-8')
def disp_table(data_table):
if data_table.shape[0]>0:
#df = transform(data_table,'Please Select columns to save whole table')
#fname = st_keyup("Please input file name to save Table", value='temp') #st.text_input('Please input file name to save Table', 'temp', live=True)
csv = convert_df(data_table)
st.download_button(
label="Download Table as CSV file",
data=csv,
#file_name=fname+'.csv',
file_name='download_gene_list.csv',
mime='text/csv',
)
#st.title('Single nuclei atlas of human aging in brain regions')
get_data()
st.title('Brain Age Browser')
txt="In the event of APP CRASH, Please Press Reset Button below"
st.markdown(f'<p style=color:red;font-size:24px;border-radius:2%;">{txt}</p>', unsafe_allow_html=True)
#st.header("In the event of **APP CRASH**, Please Press Reset Button below")
m = st.markdown("""
<style>
div.stButton > button:first-child {
background-color: #0099ff;
color:#ffffff;
}
div.stButton > button:hover {
background-color: #00ff00;
color:#ff0000;
}
</style>""", unsafe_allow_html=True)
#b = st.button("点我开始运行程序")
clear=st.button('Reset')
if clear:
st.cache_data.clear()
#st.runtime.legacy_caching.clear_cache()
#tab1, tab2,readme = st.tabs(["Gene Expression by CellType", "Age associations for multiple genes", "README"])
st.write('<style>div.row-widget.stRadio > div{flex-direction:row;justify-content: center;} </style>', unsafe_allow_html=True)
opt_selected = st.radio(
"**Please select an option**",
("Gene Expression by CellType", "Age associations for Multiple genes", "Age associations with GoTerms", "README"))
if opt_selected == 'Gene Expression by CellType':
#tab1, tab2,tab3,readme = st.tabs(["Gene Expression by CellType", "Age associations for Multiple genes", "Age associations with GoTerms", "README"])
#data = np.random.randn(10, 1)
#with tab1:
with st.form(key='columns_in_form'):
#c1, c2, c3 = st.columns([4,4,2])
c1, c2 = st.columns(2)
with c1:
selected_gene = st.selectbox(
'Please select a gene',
st.session_state['genes_list'])
with c2:
selected_celltype = st.selectbox(
'Please select CellType',
st.session_state['cell_type']
)
Updated=st.form_submit_button(label = 'Go')
if not isinstance(selected_gene, type(None)) and not isinstance(selected_celltype, type(None)) and Updated:
### NEW CODE
tita="155,192 annotated nuclei from four brain regions"
#st.write(tit)
html_stra = f"""
<style>
p.a {{
font: bold {16}px Courier;
text-align: center;
}}
</style>
<p class="a">{tita}</p>
"""
st.markdown(html_stra, unsafe_allow_html=True)
cc1,cc2=st.columns([1,1])
with cc1:
dot10=pg.scatter(st.session_state['adata_annot'],attrs=['new_anno'],basis='umap', wspace=.02,legend_loc='on data',legend_fontsize=7,return_fig=True)
dot10.get_figure().gca().set_title("")
dot10.get_figure().gca().axis('off')
xmin, xmax = dot10.get_figure().gca().get_xaxis().get_view_interval()
ymin, ymax = dot10.get_figure().gca().get_yaxis().get_view_interval()
dot10.get_figure().gca().arrow(xmin, ymin, xmax/4, 0, head_width=0.2, head_length=0.3, linewidth=.5, color='k', length_includes_head=True)
dot10.get_figure().gca().text(x=.1*xmin, y=ymin, s="UMAP1", rotation=0, fontsize=5, color='k')
dot10.get_figure().gca().arrow(xmin, ymin, 0,ymax/4, head_width=0.2, head_length=0.3, linewidth=.5, color='k', length_includes_head=True)
dot10.get_figure().gca().text(x=1.1*xmin, y=.1*ymin, s="UMAP2", rotation=0, fontsize=5, color='k')
st.pyplot(dot10)
with cc2:
dot11=pg.scatter(st.session_state['adata_annot'],attrs=selected_gene,basis='umap', wspace=.02,legend_loc='on data',legend_fontsize=7,return_fig=True)
dot11.get_figure().gca().set_title("")
#dot11.get_figure().gca().axis('off')
st.pyplot(dot11)
#Subset Young and Old
adata_Young = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='young']
adata_Old = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['Age_group']=='old']
#Young/Old but for cell_type
adata_YoungAst = adata_Young[adata_Young.obs['new_anno']==selected_celltype]
adata_OldAst = adata_Old[adata_Old.obs['new_anno']==selected_celltype]
#tit=selected_gene+": coefficient estimate: 0.24 | BH-FDR p=7.91x$10^{-3}$"
tit="Expression of "+selected_gene+" across old and young age groups"
#st.write(tit)
html_str = f"""
<style>
p.a {{
font: bold {16}px Courier;
text-align: center;
}}
</style>
<p class="a">{tit}</p>
"""
st.markdown(html_str, unsafe_allow_html=True)
cc1,cc2=st.columns([1,1])
with cc1:
########
dot12=pg.scatter_groups(st.session_state['adata_annot'], attr=selected_gene, basis='umap', groupby='Age_group', cmap='BuPu', vmin=0, vmax=6,show_full=False,nrows=2,return_fig=True)
allaxes = dot12.get_figure().get_axes()
allaxes[0].set_title("All",fontsize=20)
allaxes[0].set_ylabel("old",fontsize=20)
allaxes[0].set_xlabel("")
allaxes[1].set_title("")
allaxes[1].set_ylabel("young",fontsize=20)
allaxes[1].set_xlabel("")
#allaxes[2].set_visible(False)
#allaxes[3].set_visible(False)
allaxes[2].remove() #.set_visible(False)
allaxes[3].remove() #set_visible(False)
##########
###### SWAP THE PLOTS
# pos1 = allaxes[0].get_position()
# allaxes[0].set_position(allaxes[1].get_position())
# allaxes[1].set_position(pos1)
# pos1 = allaxes[2].get_position()
# allaxes[2].set_position(allaxes[3].get_position())
# allaxes[3].set_position(pos1)
#######
fig_width, fig_height = dot12.get_size_inches()
st.pyplot(dot12)
with cc2:
adata_Ast = st.session_state['adata_annot'][st.session_state['adata_annot'].obs['new_anno']==selected_celltype]
#### New
dot13=pg.scatter_groups(adata_Ast, attr=selected_gene, basis='umap', groupby='Age_group', cmap='BuPu', vmin=0, vmax=6,show_full=False,nrows=2,return_fig=True,legend_loc='right margin')
allaxes1 = dot13.get_figure().get_axes()
allaxes1[0].set_title("")
allaxes1[0].set_title(selected_celltype,fontsize=20)
allaxes1[0].set_xlabel("")
allaxes1[0].set_ylabel("")
allaxes1[1].set_title("")
allaxes1[1].set_xlabel("")
allaxes1[1].set_ylabel("")
chartBox1 = allaxes1[2].get_position()
chartBox2 = allaxes1[3].get_position()
allaxes1[2].set_position([chartBox1.x0+.2, chartBox1.y0,chartBox1.width,chartBox1.height])
allaxes1[3].set_position([chartBox2.x0+.2, chartBox2.y0,chartBox2.width,chartBox2.height])
#dot13.set_size_inches(fig_width+5, fig_height+5)
####
st.pyplot(dot13)
#with tab2:
elif opt_selected == 'Age associations for Multiple genes':
#set plot theme
blupink = sns.palplot(sns.diverging_palette(h_neg=234,h_pos=342,n=9,s=75,l=30,sep=10,center='light'))
sns.set_context("paper", font_scale=1)
cmap = sns.diverging_palette(h_neg=234,h_pos=342,n=9,s=75,l=30,sep=7,center='light', as_cmap=True)
with st.form(key='multi_genes_form'):
c1, c2= st.columns([9.9,.1])
with c1:
multi_genes = st.multiselect(
'Select Genes List',
st.session_state['genes_list'])
Updated_tab2=st.form_submit_button(label = 'Show GeneSet Results')
if not isinstance(multi_genes, type(None)) and Updated_tab2:
multi_genes = np.sort(multi_genes)
#####NEW CODE
tit="expression per cell type"
#st.write(tit)
html_str1 = f"""
<style>
p.a {{
font: bold {16}px Courier;
text-align: center;
}}
</style>
<p class="a">{tit}</p>
"""
cc1,cc2=st.columns([1,1])
with cc1:
try:
dot21=pg.dotplot(st.session_state['adata_annot'], genes=multi_genes, groupby='new_anno',switch_axes=True,return_fig=True,cmap='BuPu')
dot21.set_figheight(len(multi_genes)*.5)
allaxes21 = dot21.get_figure().get_axes()
#allaxes21[0].set_title("expression per cell type")
allaxes21[0].set_xlabel("")
st.markdown(html_str1, unsafe_allow_html=True)
st.pyplot(dot21)
except:
st.write("**An exception has occurred, Please check the GeneSet**")
with cc2:
# now cluster_map
celltype_DAGS = st.session_state['cluster_table'][st.session_state['cluster_table'].eval("type.str.endswith('cell_type').values")]
celltype_DAGS = celltype_DAGS.pivot(index='feature', columns='tissue')['estimate']
#replace NaN with zeroes
celltype_DAGS.fillna(0, inplace=True)
#query which cell type DAGs are in ad gene set
celltype_ad_genes = celltype_DAGS.loc[celltype_DAGS.index.isin(multi_genes)]
lst=list(celltype_ad_genes.index)
tit="age association per cell type"
#st.write(tit)
html_str1 = f"""
<style>
p.a {{
font: bold {16}px Courier;
text-align: center;
}}
</style>
<p class="a">{tit}</p>
"""
st.markdown(html_str1, unsafe_allow_html=True)
if celltype_ad_genes.shape[0] > 1:
kws = dict(cbar_kws=dict(label='coefficient estimates', orientation='vertical',shrink='.05'))
with sns.axes_style({"axes.edgecolor": "black"}):
if len(lst)>50:
g = sns.clustermap(celltype_ad_genes, figsize=(6,len(lst)*.2), linewidth=0.05, cmap=cmap, col_cluster=True, dendrogram_ratio=0.1, row_cluster=True, vmin=-1, vmax=1, center= 0, linecolor= 'white', clip_on=False,
xticklabels=True, yticklabels=True, square=False,cbar_kws={"orientation": "vertical","label": "coefficient estimates"})
x0, _y0, _w, _h = g.cbar_pos
g.ax_cbar.set_position([x0+.95, 0.75, .02, len(lst)*.0005])
else:
g = sns.clustermap(celltype_ad_genes, figsize=(6,6), linewidth=0.05, cmap=cmap, col_cluster=True, dendrogram_ratio=0.1, row_cluster=True, vmin=-1, vmax=1, center= 0, linecolor= 'white', clip_on=False,
xticklabels=True, yticklabels=True, square=False,cbar_kws={"orientation": "vertical","label": "coefficient estimates"})
x0, _y0, _w, _h = g.cbar_pos
g.ax_cbar.set_position([x0+.95, 0.6, .02, .25])
#g.ax_cbar.set_title('coefficient estimates')
g.ax_cbar.tick_params(axis='x', length=10)
ax = g.ax_heatmap
#ax.title("age association per cell type")
ax.set_ylabel("")
ax.set_xlabel("")
#ax.set_title("age association per cell type")
st.pyplot(g)
disp_table(celltype_ad_genes)
st.dataframe(celltype_ad_genes)
#
else:
st.write('**Got Empty Data Set (from aging.glmmtmb_age_diffs_fdr.csv), Please select a different set of genes**')
######END NEW CODE
#with tab3:
elif opt_selected == 'Age associations with GoTerms':
#set plot theme
blupink = sns.palplot(sns.diverging_palette(h_neg=234,h_pos=342,n=9,s=75,l=30,sep=10,center='light'))
sns.set_context("paper", font_scale=1)
cmap = sns.diverging_palette(h_neg=234,h_pos=342,n=9,s=75,l=30,sep=7,center='light', as_cmap=True)
with st.form(key='GoDatabase_form'):
selected_go_database = st.selectbox(
'Please select Pathway Database',st.session_state['go_table'])
selected_pway_data=st.session_state['all_pwaydata'][selected_go_database]
change = st.form_submit_button("Updata")
c1, c2 = st.columns([9.999,.001])
with c1:
selected_pway_data=st.session_state['all_pwaydata'][selected_go_database]
#st.write(selected_go_database)
go_term = st.selectbox(
'Select GO Term',
list(selected_pway_data.columns))
Updated_tab3=st.form_submit_button(label = 'Show Pathway Results')
#if not isinstance(multi_genes, type(None)) and Updated_tab3:# and Updated_tab4:
if not isinstance(go_term, type(None)) and Updated_tab3:# and Updated_tab4:
multi_genes = [x for x in selected_pway_data[go_term] if str(x) != 'nan']
multi_genes = list(set(multi_genes))
multi_genes=np.sort(multi_genes)
#####NEW CODE
cc1,cc2=st.columns([1,1])
with cc1:
st.markdown('**Pathway Database:** '+selected_go_database) #+'\n**Pathway Selected:** '+go_term)
st.markdown('**Pathway Selected:** '+go_term)
fig = plt.figure(figsize=(1, 1))
try:
dot21=pg.dotplot(st.session_state['adata_annot'], genes=multi_genes, groupby='new_anno',switch_axes=True,return_fig=True,cmap='BuPu')#, dpi=300.0)
dot21.set_figheight(len(multi_genes)*.3)
allaxes21 = dot21.get_figure().get_axes()
allaxes21[0].set_title("expression per cell type")
allaxes21[0].set_xlabel("")
st.pyplot(dot21)
except:
st.write("**An exception has occurred, Please check the GeneSet**")
with cc2:
# now cluster_map
celltype_DAGS = st.session_state['cluster_table'][st.session_state['cluster_table'].eval("type.str.endswith('cell_type').values")]
celltype_DAGS = celltype_DAGS.pivot(index='feature', columns='tissue')['estimate']
#replace NaN with zeroes
celltype_DAGS.fillna(0, inplace=True)
#query which cell type DAGs are in ad gene set
celltype_ad_genes = celltype_DAGS.loc[celltype_DAGS.index.isin(multi_genes)]
st.markdown('Please note that **number of Genes in Pathway are:** '+str(len(multi_genes))) #+' \nAnd Genes **in aging.glmmtmb_age_diffs_fdr.csv file are:** '+str(len(celltype_ad_genes)))
st.markdown('Genes **in aging.glmmtmb_age_diffs_fdr.csv file are:** '+str(len(celltype_ad_genes)))
#st.write('**Common List Is:** ')
tit="age association per cell type"
#st.write(tit)
html_str1 = f"""
<style>
p.a {{
font: bold {16}px Courier;
text-align: center;
}}
</style>
<p class="a">{tit}</p>
"""
lst=list(celltype_ad_genes.index)
if celltype_ad_genes.shape[0] > 1:
st.markdown(html_str1, unsafe_allow_html=True)
kws = dict(cbar_kws=dict(label='coefficient estimates', orientation='vertical',shrink='.05'))
with sns.axes_style({"axes.edgecolor": "black"}):
if len(lst)>50:
g = sns.clustermap(celltype_ad_genes, figsize=(6,len(lst)*.2), linewidth=0.05, cmap=cmap, col_cluster=True, dendrogram_ratio=0.1, row_cluster=True, vmin=-1, vmax=1, center= 0, linecolor= 'white', clip_on=False,
xticklabels=True, yticklabels=True, square=False,cbar_kws={"orientation": "vertical","label": "coefficient estimates"})
x0, _y0, _w, _h = g.cbar_pos
#g.ax_cbar.set_position([x0+.9, 0.3, g.ax_row_dendrogram.get_position().width, 0.5])
g.ax_cbar.set_position([x0+.95, 0.75, .02, len(lst)*.0005])
else:
g = sns.clustermap(celltype_ad_genes, figsize=(6,6), linewidth=0.05, cmap=cmap, col_cluster=True, dendrogram_ratio=0.1, row_cluster=True, vmin=-1, vmax=1, center= 0, linecolor= 'white', clip_on=False,
xticklabels=True, yticklabels=True, square=False,cbar_kws={"orientation": "vertical","label": "coefficient estimates"})
x0, _y0, _w, _h = g.cbar_pos
#g.ax_cbar.set_position([x0+.9, 0.3, g.ax_row_dendrogram.get_position().width, 0.5])
g.ax_cbar.set_position([x0+.95, 0.6, .02, .25])
#g.ax_cbar.set_title('coefficient estimates')
g.ax_cbar.tick_params(axis='x', length=10)
ax = g.ax_heatmap
#ax.title("age association per cell type")
ax.set_ylabel("")
ax.set_xlabel("")
#ax.set_title("age association per cell type")
#g.fig.suptitle('age association per cell type')
st.pyplot(g)
disp_table(celltype_ad_genes)
st.dataframe(celltype_ad_genes)
else:
st.write('**Got Empty Data Set (aging.glmmtmb_age_diffs_fdr.csv), Please select a different set of genes**')
######END NEW CODE
#with readme:
else:
expander = st.expander("How to use this app")
#st.header('How to use this app')
expander.markdown('This app consists of 3-Tabs')
expander.markdown('**Tab1: Gene Expression by CellType:** Two dropdown lists are provided where user can select a gene and celltype of interest. After making selection, Please press Go Button')
expander.markdown('**Tabe2: Age assosciation for Multiple genes: ** A multiselect drop down list is provided to select geens of interest. A dotplot showing expression per cell type for the selected genes is shown on the left. Right plot shows age association per cell type as clustermap for the selected genes.')
expander.markdown('**Tab3: Age assosciations with GoTerms: Here user can select a pathway database from a dropdown list of pathway databaass. Once a pathway database is selected (after pressing Update button), another drowdown list shows all pathwys(GoTerms) for the selected database. After selecting a pathway of choice, Please press Show Pathway Results button.')
expander.markdown('**README: This tab**')
expander1 = st.expander('Introduction')
expander1.markdown(
""" Coming Soon.
"""
)
expander1.markdown('Coming soon')
expander1.markdown('- ')
expander1.markdown('- ')
expander1.markdown('- ')
expander1.markdown('Coming soon')
css = '''
<style>
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p {
font-size:1.5rem;
}
</style>
'''
st.markdown(css, unsafe_allow_html=True)