Martin Proks
commited on
chore: port streamlit-portal to huggingface spaces
Browse files- Home.py +49 -0
- README.md +11 -8
- __init__.py +0 -0
- constants.py +26 -0
- pages/1_Gene_Expression.py +61 -0
- pages/2_Differentially_Expressed_Genes.py +72 -0
- pages/3_SHAP_features.py +58 -0
- pages/4_Download.py +40 -0
- pages/__init__.py +0 -0
- prepare.ipynb +0 -0
- requirements.txt +3 -0
- static/Fig-1.v4.3.png +0 -0
- utils.py +164 -0
Home.py
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
st.set_page_config(layout="wide")
|
5 |
+
|
6 |
+
st.markdown(
|
7 |
+
"""
|
8 |
+
# Deep Learning Based Models for Preimplantation Mouse and Human Embryos Based on Single Cell RNA Sequencing
|
9 |
+
|
10 |
+
_[Martin Proks](https://orcid.org/0000-0002-8178-3128)\*,
|
11 |
+
[Nazmus Salehin](https://orcid.org/0000-0002-8155-4296)\*,
|
12 |
+
[Joshua M. Brickman](https://orcid.org/0000-0003-1580-7491)**_
|
13 |
+
|
14 |
+
_\* There authors contributed equally to the work_
|
15 |
+
|
16 |
+
_** Corresponding author [[email protected]](mailto:[email protected])_
|
17 |
+
|
18 |
+
The rapid growth of single-cell transcriptomic technology has produced an increasing number of
|
19 |
+
datasets for both embryonic development and _in vitro_ pluripotent stem cell derived models.
|
20 |
+
This avalanche of data surrounding pluripotency and the process of lineage specification has
|
21 |
+
meant it has become increasingly difficult to define specific cell types or states and compare
|
22 |
+
these to _in vitro_ differentiation. Here we utilize a set of deep learning (DL) tools to
|
23 |
+
integrate and classify multiple datasets. This allows for the definition of both mouse and
|
24 |
+
human embryo cell types, lineages and states, thereby maximising the information one can garner
|
25 |
+
from these precious experimental resources. Our approaches are built on recent initiatives for
|
26 |
+
large scale human organ atlases, but here we focus on the difficult to obtain and process
|
27 |
+
material that spans early mouse and human development. We deploy similar approaches as the
|
28 |
+
initiatives building large reference organ atlases, however with a focus on early mammalian
|
29 |
+
development. Using publicly available data for these stages, we test different deep learning
|
30 |
+
approaches and develop a model to classify cell types in an unbiased fashion at the same time as
|
31 |
+
defining the set of genes used by the model to identify lineages, cell types and states. We have
|
32 |
+
used our models trained on _in vivo_ development to classify pluripotent stem cell models for
|
33 |
+
both mouse and human development, showcasing the importance of this resource as a dynamic
|
34 |
+
reference for early embryogenesis.
|
35 |
+
"""
|
36 |
+
)
|
37 |
+
|
38 |
+
st.image(
|
39 |
+
"static/Fig-1.v4.3.png",
|
40 |
+
caption="""
|
41 |
+
Summary of datasets used to build reference models. a) Schematic overview of mouse
|
42 |
+
and human preimplantation development. b) Quantification of cells per publication which
|
43 |
+
were collected for building the mouse (grey) and human (black) reference. c) Computational
|
44 |
+
schematic of tools used to build and interpret the reference models. d) Gene expression
|
45 |
+
of canonical markers for each developmental stage in mouse (top) and human (bottom)
|
46 |
+
preimplantation. e) Reduced dimensional representation of preimplantation mouse (left)
|
47 |
+
and human (right) datasets. dpf: days post fertilization, E: embryonic day.
|
48 |
+
""",
|
49 |
+
)
|
README.md
CHANGED
@@ -1,12 +1,15 @@
|
|
1 |
---
|
2 |
-
title: Preimplantation
|
3 |
-
emoji: 🏆
|
4 |
-
colorFrom: green
|
5 |
-
colorTo: purple
|
6 |
sdk: streamlit
|
7 |
-
sdk_version: 1.
|
8 |
-
app_file:
|
9 |
-
pinned: false
|
10 |
---
|
11 |
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: Preimplantation portal
|
|
|
|
|
|
|
3 |
sdk: streamlit
|
4 |
+
sdk_version: 1.38.0
|
5 |
+
app_file: Home.py
|
|
|
6 |
---
|
7 |
|
8 |
+
# preimplantation portal
|
9 |
+
|
10 |
+
## Run locally
|
11 |
+
|
12 |
+
```bash
|
13 |
+
source venv/bin/activate
|
14 |
+
streamlit run Home.py
|
15 |
+
```
|
__init__.py
ADDED
File without changes
|
constants.py
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
VERSION = 1.5
|
2 |
+
|
3 |
+
DEFAULT_DR = "X_draw_graph_fa"
|
4 |
+
DEFAULT_META = "stage"
|
5 |
+
ZENODO_URL = "https://zenodo.org/records/13749348/files"
|
6 |
+
|
7 |
+
DATA = {
|
8 |
+
"HUMAN": {
|
9 |
+
"RAW_DATASET": f"{ZENODO_URL}/32_human_adata.h5ad",
|
10 |
+
"DATASET": f"{ZENODO_URL}/portal_human_v{VERSION}.h5ad",
|
11 |
+
"DEGS": {
|
12 |
+
"CT": f"{ZENODO_URL}/human_degs_ct_v{VERSION}.feather",
|
13 |
+
"STAGE": f"{ZENODO_URL}/human_degs_stage_v{VERSION}.feather"
|
14 |
+
},
|
15 |
+
"SHAP": f"{ZENODO_URL}/human_SHAP_v{VERSION}.feather",
|
16 |
+
},
|
17 |
+
"MOUSE": {
|
18 |
+
"RAW_DATASET": f"{ZENODO_URL}/01_mouse_reprocessed.h5ad",
|
19 |
+
"DATASET": f"{ZENODO_URL}/portal_mouse_v{VERSION}.h5ad",
|
20 |
+
"DEGS": {
|
21 |
+
"CT": f"{ZENODO_URL}/mouse_degs_ct_v{VERSION}.feather",
|
22 |
+
"STAGE": f"{ZENODO_URL}/mouse_degs_stage_v{VERSION}.feather",
|
23 |
+
},
|
24 |
+
"SHAP": f"{ZENODO_URL}/mouse_SHAP_v{VERSION}.feather",
|
25 |
+
},
|
26 |
+
}
|
pages/1_Gene_Expression.py
ADDED
@@ -0,0 +1,61 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import anndata
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from constants import DATA, DEFAULT_DR, DEFAULT_META
|
6 |
+
from utils import fetch_resource, plot_feature, plot_sc_embedding
|
7 |
+
|
8 |
+
st.set_page_config(layout="wide")
|
9 |
+
st.markdown("""
|
10 |
+
# Gene expression
|
11 |
+
|
12 |
+
Levels of gene activity along differentiation.
|
13 |
+
""")
|
14 |
+
|
15 |
+
ds = st.sidebar.selectbox(
|
16 |
+
"**Load dataset**",
|
17 |
+
DATA.keys(),
|
18 |
+
index=None,
|
19 |
+
placeholder="Select contact method...",
|
20 |
+
)
|
21 |
+
|
22 |
+
if ds is not None:
|
23 |
+
|
24 |
+
adata = anndata.read_h5ad(fetch_resource(DATA[ds]['DATASET']))
|
25 |
+
|
26 |
+
sl_dr = st.sidebar.selectbox(
|
27 |
+
"**Dimension reduction**",
|
28 |
+
adata.obsm_keys(),
|
29 |
+
index=adata.obsm_keys().index(DEFAULT_DR),
|
30 |
+
placeholder="Select method ...",
|
31 |
+
)
|
32 |
+
|
33 |
+
sl_metadata = st.sidebar.selectbox(
|
34 |
+
"**Metadata**",
|
35 |
+
adata.obs.columns,
|
36 |
+
index=adata.obs.columns.get_loc(DEFAULT_META),
|
37 |
+
placeholder="Select column ...",
|
38 |
+
)
|
39 |
+
|
40 |
+
sl_feature = st.sidebar.selectbox(
|
41 |
+
"**Gene**",
|
42 |
+
adata.raw.var_names,
|
43 |
+
index=0,
|
44 |
+
placeholder="Select gene ...",
|
45 |
+
)
|
46 |
+
|
47 |
+
is_imputed = sl_feature in adata.var_names
|
48 |
+
sl_denoised = st.sidebar.checkbox(
|
49 |
+
"Use denoised expression?",
|
50 |
+
help="Denoised expression is sampled from the decoder.",
|
51 |
+
disabled=(not is_imputed)
|
52 |
+
)
|
53 |
+
|
54 |
+
col1, col2 = st.columns(2)
|
55 |
+
plot_sc_embedding(adata, group_by=sl_metadata, reduction_key=sl_dr, ax=col1)
|
56 |
+
plot_sc_embedding(
|
57 |
+
adata, feature=sl_feature, reduction_key=sl_dr, layer=sl_denoised, ax=col2
|
58 |
+
)
|
59 |
+
|
60 |
+
st.markdown("## Raw gene expression")
|
61 |
+
plot_feature(adata, feature=sl_feature, group_by=sl_metadata, kind="box")
|
pages/2_Differentially_Expressed_Genes.py
ADDED
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from constants import DATA
|
6 |
+
from utils import fetch_resource
|
7 |
+
|
8 |
+
st.set_page_config(layout="wide")
|
9 |
+
|
10 |
+
st.markdown("""
|
11 |
+
# Differentially expressed genes
|
12 |
+
|
13 |
+
Genes below have been determined using `sc.tl.rank_genes_groups` where `t-test_overestim_var`
|
14 |
+
is the default method.
|
15 |
+
"""
|
16 |
+
)
|
17 |
+
|
18 |
+
filter_flag = []
|
19 |
+
ds = st.sidebar.selectbox(
|
20 |
+
"**Select models**",
|
21 |
+
DATA.keys(),
|
22 |
+
index=None,
|
23 |
+
placeholder="Select species",
|
24 |
+
)
|
25 |
+
|
26 |
+
if ds:
|
27 |
+
filter_by = st.sidebar.selectbox(
|
28 |
+
"**Select by**",
|
29 |
+
DATA[ds]["DEGS"].keys(),
|
30 |
+
index=None,
|
31 |
+
placeholder="Select by",
|
32 |
+
)
|
33 |
+
|
34 |
+
if ds and filter_by:
|
35 |
+
markers = pd.read_feather(fetch_resource(DATA[ds]['DEGS'][filter_by]))
|
36 |
+
|
37 |
+
group = st.sidebar.multiselect(
|
38 |
+
"**Cell type**", markers.group.unique(), placeholder="Select group ..."
|
39 |
+
)
|
40 |
+
|
41 |
+
genes = st.sidebar.multiselect(
|
42 |
+
"**Gene**", markers.gene_symbol.unique(), placeholder="Select genes ..."
|
43 |
+
)
|
44 |
+
|
45 |
+
foldchange = st.sidebar.number_input(
|
46 |
+
"**Log2 fold-change**",
|
47 |
+
value=1,
|
48 |
+
)
|
49 |
+
|
50 |
+
pval = st.sidebar.number_input(
|
51 |
+
"**Adjusted p-value**",
|
52 |
+
value=0.05,
|
53 |
+
)
|
54 |
+
|
55 |
+
if group:
|
56 |
+
filter_flag.append("group == @group")
|
57 |
+
|
58 |
+
if genes:
|
59 |
+
filter_flag.append("@genes in gene_symbol")
|
60 |
+
|
61 |
+
if foldchange:
|
62 |
+
filter_flag.append(
|
63 |
+
"logfoldchanges > @foldchange"
|
64 |
+
if foldchange > 0
|
65 |
+
else "logfoldchanges < @foldchange"
|
66 |
+
)
|
67 |
+
|
68 |
+
if pval:
|
69 |
+
filter_flag.append("pvals_adj < @pval")
|
70 |
+
|
71 |
+
subset = markers.query(" & ".join(filter_flag)) if filter_flag else markers
|
72 |
+
st.dataframe(subset, use_container_width=True, height=650)
|
pages/3_SHAP_features.py
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import pandas as pd
|
3 |
+
import streamlit as st
|
4 |
+
|
5 |
+
from constants import DATA
|
6 |
+
from utils import fetch_resource
|
7 |
+
|
8 |
+
st.set_page_config(layout="wide")
|
9 |
+
|
10 |
+
st.markdown("""
|
11 |
+
# SHAP features
|
12 |
+
|
13 |
+
Predicted features (genes) used by the scANVI classifier to determine a cell type. The features
|
14 |
+
have been determined using [SHAP](https://shap.readthedocs.io/en/latest/).
|
15 |
+
|
16 |
+
Each metric for a feature is determined from 10 random boostraps with replacement.
|
17 |
+
|
18 |
+
- weight_mean: $\mu$ of SHAP value
|
19 |
+
- weight_std: $\sigma$ of SHAP value
|
20 |
+
- weight_ci_upper: $\mu$ + $\sigma$
|
21 |
+
- weight_ci_lower: $\mu$ - $\sigma$
|
22 |
+
- logfoldchanges: Log2fold change from differentiation expression analysis
|
23 |
+
- pvals_adj: Adjusted p-value from differentiation expression analysis
|
24 |
+
- scores: Estimated score from differentiation expression analysis
|
25 |
+
"""
|
26 |
+
)
|
27 |
+
|
28 |
+
ds = st.sidebar.selectbox(
|
29 |
+
"**Load dataset**",
|
30 |
+
DATA.keys(),
|
31 |
+
index=None,
|
32 |
+
placeholder="Select dataset ...",
|
33 |
+
)
|
34 |
+
|
35 |
+
if ds:
|
36 |
+
data = pd.read_feather(fetch_resource(DATA[ds]["SHAP"]))
|
37 |
+
|
38 |
+
query = st.sidebar.selectbox(
|
39 |
+
"**Subset**",
|
40 |
+
data.ct.unique().tolist(),
|
41 |
+
index=None,
|
42 |
+
placeholder="Select cell type ...",
|
43 |
+
)
|
44 |
+
|
45 |
+
features = st.sidebar.multiselect(
|
46 |
+
"**Genes**", data.feature.unique(), placeholder="Select genes ..."
|
47 |
+
)
|
48 |
+
|
49 |
+
filter_condition = []
|
50 |
+
if query:
|
51 |
+
filter_condition.append("ct == @query")
|
52 |
+
if features:
|
53 |
+
filter_condition.append("feature in @features")
|
54 |
+
|
55 |
+
if filter_condition:
|
56 |
+
data = data.query(" & ".join(filter_condition))
|
57 |
+
|
58 |
+
st.dataframe(data, use_container_width=True, height=650)
|
pages/4_Download.py
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python
|
2 |
+
import streamlit as st
|
3 |
+
|
4 |
+
from constants import DATA
|
5 |
+
|
6 |
+
st.set_page_config(layout="wide")
|
7 |
+
|
8 |
+
st.markdown(
|
9 |
+
f"""
|
10 |
+
# Download
|
11 |
+
|
12 |
+
## 1. Pipelines
|
13 |
+
|
14 |
+
- Downloading datasets: [nf-core/fetchngs (revision 1.10.0)](https://github.com/nf-core/fetchngs)
|
15 |
+
- Aligning datasets: [brickmanlab/scrnaseq (revision: feature/smartseq)](https://github.com/brickmanlab/scrnaseq)
|
16 |
+
- **Ensembl Genomes**
|
17 |
+
- Mouse: GRCm38 v102
|
18 |
+
- Human: GRCh38 v110
|
19 |
+
|
20 |
+
## 2. Codebase
|
21 |
+
|
22 |
+
- Data analysis: [brickmanlab/proks-salehin-et-al](https://github.com/brickmanlab/proks-salehin-et-al)
|
23 |
+
- Web portal: [brickmanlab/preimplantation-portal](https://github.com/brickmanlab/preimplantation-portal)
|
24 |
+
|
25 |
+
## 3. Raw data
|
26 |
+
|
27 |
+
- [Mouse]({DATA['MOUSE']['RAW_DATASET']})
|
28 |
+
- [Human]({DATA['HUMAN']['RAW_DATASET']})
|
29 |
+
|
30 |
+
## 4. AI models
|
31 |
+
|
32 |
+
Trained models with parameters were uploaded to [Hugging Face](https://huggingface.co/brickmanlab/preimplantation-models).
|
33 |
+
|
34 |
+
### 4.1 Models
|
35 |
+
|
36 |
+
- [scANVI mouse](https://huggingface.co/brickmanlab/mouse-scanvi)
|
37 |
+
- [scANVI human](https://huggingface.co/brickmanlab/human-scanvi)
|
38 |
+
|
39 |
+
"""
|
40 |
+
)
|
pages/__init__.py
ADDED
File without changes
|
prepare.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
anndata==0.10.9
|
2 |
+
plotly==5.24.0
|
3 |
+
pyarrow==17.0.0
|
static/Fig-1.v4.3.png
ADDED
![]() |
utils.py
ADDED
@@ -0,0 +1,164 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import socket
|
2 |
+
import urllib.request
|
3 |
+
from pathlib import Path
|
4 |
+
from typing import Literal
|
5 |
+
|
6 |
+
import anndata
|
7 |
+
import pandas as pd
|
8 |
+
import plotly.express as px
|
9 |
+
import streamlit as st
|
10 |
+
|
11 |
+
|
12 |
+
@st.cache_data
|
13 |
+
def fetch_resource(url: str) -> str:
|
14 |
+
"""Helper function for downloading datasets
|
15 |
+
|
16 |
+
Parameters
|
17 |
+
----------
|
18 |
+
url : str
|
19 |
+
Zenodo url link
|
20 |
+
|
21 |
+
Returns
|
22 |
+
-------
|
23 |
+
str
|
24 |
+
Path where the file was downloaded to, default /tmp
|
25 |
+
"""
|
26 |
+
|
27 |
+
filename = f"/tmp/{url.split('/')[-1]}"
|
28 |
+
if not Path(filename).exists():
|
29 |
+
try:
|
30 |
+
urllib.request.urlretrieve(url, filename)
|
31 |
+
except (socket.gaierror, urllib.error.URLError) as err:
|
32 |
+
raise ConnectionError(f"could not download {url} due to {err}")
|
33 |
+
|
34 |
+
return filename
|
35 |
+
|
36 |
+
|
37 |
+
def get_embedding(adata: anndata.AnnData, key: str) -> pd.DataFrame:
|
38 |
+
"""
|
39 |
+
Helper function which retrieves embedding coordinates for each cell.
|
40 |
+
|
41 |
+
Parameters
|
42 |
+
----------
|
43 |
+
adata : anndata.AnnData
|
44 |
+
scrna-seq dataset
|
45 |
+
key : str
|
46 |
+
Dimension reduction key, usually starts with X_
|
47 |
+
|
48 |
+
Returns
|
49 |
+
-------
|
50 |
+
pd.DataFrame
|
51 |
+
Embedding coordinates
|
52 |
+
|
53 |
+
Raises
|
54 |
+
------
|
55 |
+
ValueError
|
56 |
+
Fail if reduction key doesn't exist
|
57 |
+
"""
|
58 |
+
if key not in adata.obsm.keys():
|
59 |
+
raise ValueError(f"Reduction key: {key} not available")
|
60 |
+
|
61 |
+
dimension_names = f"{key[2:].upper()}_1", f"{key[2:].upper()}_2"
|
62 |
+
return pd.DataFrame(adata.obsm[key][:, :2], columns=dimension_names)
|
63 |
+
|
64 |
+
|
65 |
+
def plot_sc_embedding(
|
66 |
+
adata: anndata.AnnData,
|
67 |
+
reduction_key: str,
|
68 |
+
group_by: str = None,
|
69 |
+
feature: str = None,
|
70 |
+
layer: str = None,
|
71 |
+
ax = None,
|
72 |
+
):
|
73 |
+
"""
|
74 |
+
Plot single-cell dataset
|
75 |
+
|
76 |
+
Parameters
|
77 |
+
----------
|
78 |
+
adata : anndata.AnnData
|
79 |
+
scrna-seq dataset
|
80 |
+
reduction_key : str
|
81 |
+
Reduced space key
|
82 |
+
group_by : str
|
83 |
+
Key used to color cells
|
84 |
+
features: str
|
85 |
+
Gene
|
86 |
+
ax : _type_
|
87 |
+
Axes
|
88 |
+
"""
|
89 |
+
embeddings = get_embedding(adata, reduction_key)
|
90 |
+
|
91 |
+
if group_by:
|
92 |
+
embeddings[group_by] = adata.obs[group_by].values
|
93 |
+
embeddings = embeddings.sort_values(by=group_by)
|
94 |
+
|
95 |
+
# color_uns_key = f"{group_by}_colors"
|
96 |
+
|
97 |
+
kwargs = {"color": embeddings[group_by].values.tolist()}
|
98 |
+
if adata.obs[group_by].dtype == "category":
|
99 |
+
...
|
100 |
+
else:
|
101 |
+
kwargs["color_continuous_scale"] = px.colors.sequential.Viridis
|
102 |
+
|
103 |
+
if feature:
|
104 |
+
X = (
|
105 |
+
adata[:, feature].layers["scVI_normalized"].toarray()
|
106 |
+
if layer
|
107 |
+
else adata.raw[:, feature].X.toarray()
|
108 |
+
)
|
109 |
+
embeddings[feature] = X.ravel()
|
110 |
+
kwargs = {
|
111 |
+
"color": embeddings[feature].values.tolist(),
|
112 |
+
# "title": feature,
|
113 |
+
"color_continuous_scale": px.colors.sequential.Viridis,
|
114 |
+
}
|
115 |
+
|
116 |
+
ax_ = ax if ax else st
|
117 |
+
ax_.plotly_chart(
|
118 |
+
px.scatter(
|
119 |
+
data_frame=embeddings,
|
120 |
+
x=embeddings.columns[0],
|
121 |
+
y=embeddings.columns[1],
|
122 |
+
**kwargs,
|
123 |
+
),
|
124 |
+
use_container_width=True,
|
125 |
+
# .update_xaxes(showgrid=False)
|
126 |
+
# .update_yaxes(showgrid=False, zeroline=False)
|
127 |
+
)
|
128 |
+
|
129 |
+
|
130 |
+
def plot_feature(
|
131 |
+
adata: anndata.AnnData,
|
132 |
+
feature: str,
|
133 |
+
group_by: str,
|
134 |
+
kind: Literal["box"] = "box",
|
135 |
+
ax = None
|
136 |
+
):
|
137 |
+
"""Plot feature expression
|
138 |
+
|
139 |
+
Parameters
|
140 |
+
----------
|
141 |
+
adata : anndata.AnnData
|
142 |
+
Dataset
|
143 |
+
feature : str
|
144 |
+
Gene name
|
145 |
+
group_by : str
|
146 |
+
Metadata column
|
147 |
+
kind : str
|
148 |
+
Type of plot
|
149 |
+
ax : _type_, optional
|
150 |
+
Axis, by default None
|
151 |
+
"""
|
152 |
+
|
153 |
+
df = pd.DataFrame(adata.raw[:, feature].X.toarray(), columns=[feature])
|
154 |
+
df[group_by] = adata.obs[group_by].values
|
155 |
+
df = df.sort_values(by=group_by)
|
156 |
+
|
157 |
+
g = None
|
158 |
+
if kind == "box":
|
159 |
+
g = px.box(df, x=group_by, y=feature, color=group_by)
|
160 |
+
else:
|
161 |
+
raise ValueError(f"Provided kind: {kind} not supported")
|
162 |
+
|
163 |
+
ax_ = ax if ax else st
|
164 |
+
ax_.plotly_chart(g, use_container_width=True)
|