feat: move from streamlit to hugging spaces
Browse files- .gitignore +3 -0
- README.md +4 -3
- constants.py +3 -23
- pages/1_Gene_Expression.py +11 -14
- pages/2_Differentially_Expressed_Genes.py +41 -46
- pages/3_SHAP_features.py +41 -31
- pages/4_Download.py +25 -16
- utils.py +98 -5
.gitignore
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
.ruff_cache
|
2 |
+
.venv
|
3 |
+
__pycache__
|
README.md
CHANGED
@@ -1,15 +1,16 @@
|
|
1 |
---
|
2 |
title: Preimplantation portal
|
|
|
3 |
sdk: streamlit
|
4 |
sdk_version: 1.38.0
|
5 |
app_file: Home.py
|
6 |
---
|
7 |
|
8 |
-
# preimplantation portal
|
9 |
-
|
10 |
## Run locally
|
11 |
|
12 |
```bash
|
13 |
-
source venv/bin/activate
|
|
|
|
|
14 |
streamlit run Home.py
|
15 |
```
|
|
|
1 |
---
|
2 |
title: Preimplantation portal
|
3 |
+
short_description: Preimplantation mouse and human development
|
4 |
sdk: streamlit
|
5 |
sdk_version: 1.38.0
|
6 |
app_file: Home.py
|
7 |
---
|
8 |
|
|
|
|
|
9 |
## Run locally
|
10 |
|
11 |
```bash
|
12 |
+
source .venv/bin/activate
|
13 |
+
# install dependencies
|
14 |
+
uv pip install -r requirements.txt
|
15 |
streamlit run Home.py
|
16 |
```
|
constants.py
CHANGED
@@ -1,26 +1,6 @@
|
|
1 |
-
|
|
|
|
|
2 |
|
3 |
DEFAULT_DR = "X_draw_graph_fa"
|
4 |
DEFAULT_META = "stage"
|
5 |
-
ZENODO_URL = "https://zenodo.org/records/13749348/files"
|
6 |
-
|
7 |
-
DATA = {
|
8 |
-
"HUMAN": {
|
9 |
-
"RAW_DATASET": f"{ZENODO_URL}/32_human_adata.h5ad",
|
10 |
-
"DATASET": f"{ZENODO_URL}/portal_human_v{VERSION}.h5ad",
|
11 |
-
"DEGS": {
|
12 |
-
"CT": f"{ZENODO_URL}/human_degs_ct_v{VERSION}.feather",
|
13 |
-
"STAGE": f"{ZENODO_URL}/human_degs_stage_v{VERSION}.feather"
|
14 |
-
},
|
15 |
-
"SHAP": f"{ZENODO_URL}/human_SHAP_v{VERSION}.feather",
|
16 |
-
},
|
17 |
-
"MOUSE": {
|
18 |
-
"RAW_DATASET": f"{ZENODO_URL}/01_mouse_reprocessed.h5ad",
|
19 |
-
"DATASET": f"{ZENODO_URL}/portal_mouse_v{VERSION}.h5ad",
|
20 |
-
"DEGS": {
|
21 |
-
"CT": f"{ZENODO_URL}/mouse_degs_ct_v{VERSION}.feather",
|
22 |
-
"STAGE": f"{ZENODO_URL}/mouse_degs_stage_v{VERSION}.feather",
|
23 |
-
},
|
24 |
-
"SHAP": f"{ZENODO_URL}/mouse_SHAP_v{VERSION}.feather",
|
25 |
-
},
|
26 |
-
}
|
|
|
1 |
+
# Constants
|
2 |
+
|
3 |
+
MODELS = {"HUMAN": ["v1.0.1", "main"], "MOUSE": ["v1.0.1", "main"]}
|
4 |
|
5 |
DEFAULT_DR = "X_draw_graph_fa"
|
6 |
DEFAULT_META = "stage"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pages/1_Gene_Expression.py
CHANGED
@@ -1,9 +1,8 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
-
import anndata
|
3 |
import streamlit as st
|
4 |
|
5 |
-
from constants import
|
6 |
-
from utils import fetch_resource, plot_feature, plot_sc_embedding
|
7 |
|
8 |
st.set_page_config(layout="wide")
|
9 |
st.markdown("""
|
@@ -12,16 +11,10 @@ st.markdown("""
|
|
12 |
Levels of gene activity along differentiation.
|
13 |
""")
|
14 |
|
15 |
-
|
16 |
-
"**Load dataset**",
|
17 |
-
DATA.keys(),
|
18 |
-
index=None,
|
19 |
-
placeholder="Select contact method...",
|
20 |
-
)
|
21 |
|
22 |
-
if
|
23 |
-
|
24 |
-
adata = anndata.read_h5ad(fetch_resource(DATA[ds]['DATASET']))
|
25 |
|
26 |
sl_dr = st.sidebar.selectbox(
|
27 |
"**Dimension reduction**",
|
@@ -30,6 +23,10 @@ if ds is not None:
|
|
30 |
placeholder="Select method ...",
|
31 |
)
|
32 |
|
|
|
|
|
|
|
|
|
33 |
sl_metadata = st.sidebar.selectbox(
|
34 |
"**Metadata**",
|
35 |
adata.obs.columns,
|
@@ -39,7 +36,7 @@ if ds is not None:
|
|
39 |
|
40 |
sl_feature = st.sidebar.selectbox(
|
41 |
"**Gene**",
|
42 |
-
adata.raw.var_names,
|
43 |
index=0,
|
44 |
placeholder="Select gene ...",
|
45 |
)
|
@@ -48,7 +45,7 @@ if ds is not None:
|
|
48 |
sl_denoised = st.sidebar.checkbox(
|
49 |
"Use denoised expression?",
|
50 |
help="Denoised expression is sampled from the decoder.",
|
51 |
-
disabled=(not is_imputed)
|
52 |
)
|
53 |
|
54 |
col1, col2 = st.columns(2)
|
|
|
1 |
#!/usr/bin/env python
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
+
from constants import DEFAULT_DR, DEFAULT_META
|
5 |
+
from utils import fetch_resource, plot_feature, plot_sc_embedding, ui_model_selection
|
6 |
|
7 |
st.set_page_config(layout="wide")
|
8 |
st.markdown("""
|
|
|
11 |
Levels of gene activity along differentiation.
|
12 |
""")
|
13 |
|
14 |
+
ui_model_selection()
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
if st.session_state["SPECIE"] and st.session_state["VERSION"]:
|
17 |
+
adata = fetch_resource(st.session_state["SPECIE"], st.session_state["VERSION"])
|
|
|
18 |
|
19 |
sl_dr = st.sidebar.selectbox(
|
20 |
"**Dimension reduction**",
|
|
|
23 |
placeholder="Select method ...",
|
24 |
)
|
25 |
|
26 |
+
st.sidebar.markdown(
|
27 |
+
f"Visualization done on `{adata.uns['neighbors']['params']['use_rep']}` space."
|
28 |
+
)
|
29 |
+
|
30 |
sl_metadata = st.sidebar.selectbox(
|
31 |
"**Metadata**",
|
32 |
adata.obs.columns,
|
|
|
36 |
|
37 |
sl_feature = st.sidebar.selectbox(
|
38 |
"**Gene**",
|
39 |
+
sorted(adata.raw.var_names),
|
40 |
index=0,
|
41 |
placeholder="Select gene ...",
|
42 |
)
|
|
|
45 |
sl_denoised = st.sidebar.checkbox(
|
46 |
"Use denoised expression?",
|
47 |
help="Denoised expression is sampled from the decoder.",
|
48 |
+
disabled=(not is_imputed),
|
49 |
)
|
50 |
|
51 |
col1, col2 = st.columns(2)
|
pages/2_Differentially_Expressed_Genes.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
-
import pandas as pd
|
3 |
import streamlit as st
|
4 |
|
5 |
-
from
|
6 |
-
from utils import fetch_resource
|
7 |
|
8 |
st.set_page_config(layout="wide")
|
9 |
|
@@ -12,61 +10,58 @@ st.markdown("""
|
|
12 |
|
13 |
Genes below have been determined using `sc.tl.rank_genes_groups` where `t-test_overestim_var`
|
14 |
is the default method.
|
15 |
-
"""
|
16 |
-
|
|
|
17 |
|
18 |
filter_flag = []
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
)
|
25 |
-
|
26 |
-
if ds:
|
27 |
-
filter_by = st.sidebar.selectbox(
|
28 |
"**Select by**",
|
29 |
-
|
30 |
index=None,
|
31 |
-
placeholder="
|
32 |
)
|
33 |
|
34 |
-
if
|
35 |
-
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
|
55 |
-
|
56 |
-
|
57 |
|
58 |
-
|
59 |
-
|
60 |
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
-
|
70 |
|
71 |
-
|
72 |
-
|
|
|
1 |
#!/usr/bin/env python
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
+
from utils import fetch_resource, get_degs, ui_model_selection
|
|
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
|
|
10 |
|
11 |
Genes below have been determined using `sc.tl.rank_genes_groups` where `t-test_overestim_var`
|
12 |
is the default method.
|
13 |
+
""")
|
14 |
+
|
15 |
+
ui_model_selection()
|
16 |
|
17 |
filter_flag = []
|
18 |
+
|
19 |
+
if st.session_state["SPECIE"] and st.session_state["VERSION"]:
|
20 |
+
adata = fetch_resource(st.session_state["SPECIE"], st.session_state["VERSION"])
|
21 |
+
|
22 |
+
degs_by = st.sidebar.selectbox(
|
|
|
|
|
|
|
|
|
23 |
"**Select by**",
|
24 |
+
[x for x in adata.uns_keys() if "degs" in x],
|
25 |
index=None,
|
26 |
+
placeholder="Differentially expressed genes by",
|
27 |
)
|
28 |
|
29 |
+
if degs_by:
|
30 |
+
degs = get_degs(adata, degs_by)
|
31 |
|
32 |
+
group = st.sidebar.multiselect(
|
33 |
+
"**Cell type**", degs["group"].unique(), placeholder="Select group ..."
|
34 |
+
)
|
35 |
|
36 |
+
genes = st.sidebar.multiselect(
|
37 |
+
"**Gene**", sorted(degs["names"].unique()), placeholder="Select genes ..."
|
38 |
+
)
|
39 |
|
40 |
+
foldchange = st.sidebar.number_input(
|
41 |
+
"**Log2 fold-change**",
|
42 |
+
value=1,
|
43 |
+
)
|
44 |
|
45 |
+
pval_adj = st.sidebar.number_input(
|
46 |
+
"**Adjusted p-value**",
|
47 |
+
value=0.05,
|
48 |
+
)
|
49 |
|
50 |
+
if group:
|
51 |
+
filter_flag.append("group == @group")
|
52 |
|
53 |
+
if genes:
|
54 |
+
filter_flag.append("@genes in names")
|
55 |
|
56 |
+
if foldchange:
|
57 |
+
filter_flag.append(
|
58 |
+
"logfoldchanges > @foldchange"
|
59 |
+
if foldchange > 0
|
60 |
+
else "logfoldchanges < @foldchange"
|
61 |
+
)
|
62 |
|
63 |
+
if pval_adj:
|
64 |
+
filter_flag.append("pvals_adj < @pval_adj")
|
65 |
|
66 |
+
subset = degs.query(" & ".join(filter_flag)) if filter_flag else degs
|
67 |
+
st.dataframe(subset, use_container_width=True, height=650)
|
pages/3_SHAP_features.py
CHANGED
@@ -1,9 +1,7 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
-
import pandas as pd
|
3 |
import streamlit as st
|
4 |
|
5 |
-
from
|
6 |
-
from utils import fetch_resource
|
7 |
|
8 |
st.set_page_config(layout="wide")
|
9 |
|
@@ -22,37 +20,49 @@ st.markdown("""
|
|
22 |
- logfoldchanges: Log2fold change from differentiation expression analysis
|
23 |
- pvals_adj: Adjusted p-value from differentiation expression analysis
|
24 |
- scores: Estimated score from differentiation expression analysis
|
25 |
-
"""
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
query = st.sidebar.selectbox(
|
39 |
-
"**Subset**",
|
40 |
-
data.ct.unique().tolist(),
|
41 |
index=None,
|
42 |
-
placeholder="Select
|
43 |
)
|
44 |
|
45 |
-
|
46 |
-
"
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
filter_condition.append("feature in @features")
|
54 |
|
55 |
-
|
56 |
-
|
57 |
|
58 |
-
|
|
|
1 |
#!/usr/bin/env python
|
|
|
2 |
import streamlit as st
|
3 |
|
4 |
+
from utils import fetch_resource, ui_model_selection
|
|
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
|
|
20 |
- logfoldchanges: Log2fold change from differentiation expression analysis
|
21 |
- pvals_adj: Adjusted p-value from differentiation expression analysis
|
22 |
- scores: Estimated score from differentiation expression analysis
|
23 |
+
""")
|
24 |
+
|
25 |
+
ui_model_selection()
|
26 |
+
|
27 |
+
filter_condition = []
|
28 |
+
|
29 |
+
if st.session_state["SPECIE"] and st.session_state["VERSION"]:
|
30 |
+
adata = fetch_resource(st.session_state["SPECIE"], st.session_state["VERSION"])
|
31 |
+
|
32 |
+
explainer = st.sidebar.selectbox(
|
33 |
+
"**Explainer**",
|
34 |
+
adata.uns["explainer"].keys(),
|
|
|
|
|
|
|
|
|
35 |
index=None,
|
36 |
+
placeholder="Select explainer ...",
|
37 |
)
|
38 |
|
39 |
+
if explainer:
|
40 |
+
shap_values = adata.uns["explainer"][explainer].pop("shap_values").reset_index()
|
41 |
+
params = [f"{k}:\t{v}" for k, v in adata.uns["explainer"][explainer].items()]
|
42 |
+
|
43 |
+
st.sidebar.markdown("**Parameters**")
|
44 |
+
for k, v in adata.uns["explainer"][explainer].items():
|
45 |
+
st.sidebar.markdown(f"{k}:\t{v}")
|
46 |
+
|
47 |
+
celltype = st.sidebar.selectbox(
|
48 |
+
"**Cell type**",
|
49 |
+
adata.obs.ct.cat.categories,
|
50 |
+
index=None,
|
51 |
+
placeholder="Select cell type ...",
|
52 |
+
)
|
53 |
+
|
54 |
+
features = st.sidebar.multiselect(
|
55 |
+
"**Genes**",
|
56 |
+
sorted(shap_values.feature.unique()),
|
57 |
+
placeholder="Select genes ...",
|
58 |
+
)
|
59 |
|
60 |
+
if celltype:
|
61 |
+
filter_condition.append("ct == @celltype")
|
62 |
+
if features:
|
63 |
+
filter_condition.append("feature in @features")
|
|
|
64 |
|
65 |
+
if filter_condition:
|
66 |
+
shap_values = shap_values.query(" & ".join(filter_condition))
|
67 |
|
68 |
+
st.dataframe(shap_values, use_container_width=True, height=650)
|
pages/4_Download.py
CHANGED
@@ -1,40 +1,49 @@
|
|
1 |
#!/usr/bin/env python
|
2 |
import streamlit as st
|
3 |
|
4 |
-
from constants import
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
8 |
st.markdown(
|
9 |
-
|
10 |
-
# Download
|
11 |
|
12 |
-
## 1.
|
13 |
|
14 |
- Downloading datasets: [nf-core/fetchngs (revision 1.10.0)](https://github.com/nf-core/fetchngs)
|
15 |
- Aligning datasets: [brickmanlab/scrnaseq (revision: feature/smartseq)](https://github.com/brickmanlab/scrnaseq)
|
16 |
-
- **Ensembl Genomes**
|
17 |
- Mouse: GRCm38 v102
|
18 |
- Human: GRCh38 v110
|
19 |
|
20 |
## 2. Codebase
|
21 |
|
22 |
- Data analysis: [brickmanlab/proks-salehin-et-al](https://github.com/brickmanlab/proks-salehin-et-al)
|
23 |
-
- Web portal: [brickmanlab/preimplantation-portal](https://
|
|
|
|
|
|
|
24 |
|
25 |
-
|
26 |
|
27 |
-
-
|
28 |
-
|
|
|
29 |
|
30 |
-
## 4.
|
31 |
|
32 |
Trained models with parameters were uploaded to [Hugging Face](https://huggingface.co/brickmanlab/preimplantation-models).
|
33 |
-
|
34 |
-
### 4.1 Models
|
35 |
-
|
36 |
-
- [scANVI mouse](https://huggingface.co/brickmanlab/mouse-scanvi)
|
37 |
-
- [scANVI human](https://huggingface.co/brickmanlab/human-scanvi)
|
38 |
-
|
39 |
"""
|
40 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
#!/usr/bin/env python
|
2 |
import streamlit as st
|
3 |
|
4 |
+
from constants import MODELS
|
5 |
|
6 |
st.set_page_config(layout="wide")
|
7 |
|
8 |
st.markdown(
|
9 |
+
"""
|
10 |
+
# Download & Credits
|
11 |
|
12 |
+
## 1. Preprocessing pipelines
|
13 |
|
14 |
- Downloading datasets: [nf-core/fetchngs (revision 1.10.0)](https://github.com/nf-core/fetchngs)
|
15 |
- Aligning datasets: [brickmanlab/scrnaseq (revision: feature/smartseq)](https://github.com/brickmanlab/scrnaseq)
|
16 |
+
- **Ensembl Genomes (models < v1.0.1)**
|
17 |
- Mouse: GRCm38 v102
|
18 |
- Human: GRCh38 v110
|
19 |
|
20 |
## 2. Codebase
|
21 |
|
22 |
- Data analysis: [brickmanlab/proks-salehin-et-al](https://github.com/brickmanlab/proks-salehin-et-al)
|
23 |
+
- Web portal on HF: [brickmanlab/hf-preimplantation-portal](https://huggingface.co/spaces/brickmanlab/hf-preimplantation-portal/tree/main)
|
24 |
+
- Web portal (deprecated): [brickmanlab/preimplantation-portal](https://github.com/brickmanlab/preimplantation-portal)
|
25 |
+
|
26 |
+
## 3. Raw and normalized counts
|
27 |
|
28 |
+
Raw counts are stored in `layers['counts']` and normalized counts are stored in `.X`.
|
29 |
|
30 |
+
- models < v1.0.1
|
31 |
+
- [mouse](https://zenodo.org/records/13749348/files/01_mouse_reprocessed.h5ad)
|
32 |
+
- [human](https://zenodo.org/records/13749348/files/32_human_adata.h5ad)
|
33 |
|
34 |
+
## 4. scVI/scANVI models
|
35 |
|
36 |
Trained models with parameters were uploaded to [Hugging Face](https://huggingface.co/brickmanlab/preimplantation-models).
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
"""
|
38 |
)
|
39 |
+
|
40 |
+
text = ""
|
41 |
+
for specie in MODELS:
|
42 |
+
text += f"- **{specie}**: "
|
43 |
+
for version in MODELS[specie]:
|
44 |
+
url = (
|
45 |
+
f"https://huggingface.co/brickmanlab/{specie.lower()}-scanvi/tree/{version}"
|
46 |
+
)
|
47 |
+
text += f"[{version}]({url}), "
|
48 |
+
text = text[:-2] + "\n"
|
49 |
+
st.markdown(text)
|
utils.py
CHANGED
@@ -8,9 +8,41 @@ import pandas as pd
|
|
8 |
import plotly.express as px
|
9 |
import streamlit as st
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
@st.cache_data
|
13 |
-
def
|
14 |
"""Helper function for downloading datasets
|
15 |
|
16 |
Parameters
|
@@ -24,14 +56,45 @@ def fetch_resource(url: str) -> str:
|
|
24 |
Path where the file was downloaded to, default /tmp
|
25 |
"""
|
26 |
|
27 |
-
|
28 |
-
if not
|
|
|
|
|
|
|
29 |
try:
|
30 |
-
urllib.request.urlretrieve(url,
|
31 |
except (socket.gaierror, urllib.error.URLError) as err:
|
32 |
raise ConnectionError(f"could not download {url} due to {err}")
|
33 |
|
34 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
|
37 |
def get_embedding(adata: anndata.AnnData, key: str) -> pd.DataFrame:
|
@@ -162,3 +225,33 @@ def plot_feature(
|
|
162 |
|
163 |
ax_ = ax if ax else st
|
164 |
ax_.plotly_chart(g, use_container_width=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
import plotly.express as px
|
9 |
import streamlit as st
|
10 |
|
11 |
+
from constants import MODELS
|
12 |
+
|
13 |
+
|
14 |
+
def ui_model_selection():
|
15 |
+
|
16 |
+
# shared state variables between pages
|
17 |
+
if "SPECIE" not in st.session_state:
|
18 |
+
st.session_state["SPECIE"] = None
|
19 |
+
if "VERSION" not in st.session_state:
|
20 |
+
st.session_state["VERSION"] = None
|
21 |
+
|
22 |
+
specie = st.sidebar.selectbox(
|
23 |
+
"**Species**",
|
24 |
+
MODELS.keys(),
|
25 |
+
index=list(MODELS.keys()).index(st.session_state["SPECIE"]) if st.session_state["SPECIE"] else None,
|
26 |
+
placeholder="Supported species",
|
27 |
+
)
|
28 |
+
|
29 |
+
if specie:
|
30 |
+
version = st.sidebar.selectbox(
|
31 |
+
"**Version**",
|
32 |
+
MODELS[specie],
|
33 |
+
index=MODELS[specie].index(st.session_state["VERSION"]) if st.session_state["VERSION"] else None,
|
34 |
+
placeholder="Version",
|
35 |
+
)
|
36 |
+
|
37 |
+
st.sidebar.divider()
|
38 |
+
|
39 |
+
if specie and version:
|
40 |
+
st.session_state["SPECIE"] = specie
|
41 |
+
st.session_state["VERSION"] = version
|
42 |
+
|
43 |
|
44 |
@st.cache_data
|
45 |
+
def _fetch_resource(url: str, filename: str) -> str:
|
46 |
"""Helper function for downloading datasets
|
47 |
|
48 |
Parameters
|
|
|
56 |
Path where the file was downloaded to, default /tmp
|
57 |
"""
|
58 |
|
59 |
+
destination = Path(f"/tmp/{filename}")
|
60 |
+
if not filename:
|
61 |
+
raise ValueError("Filename not specified!")
|
62 |
+
|
63 |
+
if not destination.exists():
|
64 |
try:
|
65 |
+
urllib.request.urlretrieve(url, destination)
|
66 |
except (socket.gaierror, urllib.error.URLError) as err:
|
67 |
raise ConnectionError(f"could not download {url} due to {err}")
|
68 |
|
69 |
+
return destination.as_posix()
|
70 |
+
|
71 |
+
|
72 |
+
def fetch_resource(specie: str, version: str) -> anndata.AnnData:
|
73 |
+
"""Load H5AD dataset from Hugging Face (https://huggingface.co/brickmanlab)
|
74 |
+
|
75 |
+
Parameters
|
76 |
+
----------
|
77 |
+
specie : str
|
78 |
+
Specie
|
79 |
+
version : str
|
80 |
+
Model version
|
81 |
+
|
82 |
+
Returns
|
83 |
+
-------
|
84 |
+
anndata.AnnData
|
85 |
+
Annotated dataset
|
86 |
+
|
87 |
+
Raises
|
88 |
+
------
|
89 |
+
ValueError
|
90 |
+
Specie and Version have to exist
|
91 |
+
"""
|
92 |
+
|
93 |
+
if specie not in MODELS and version not in MODELS[specie]:
|
94 |
+
raise ValueError(f"Provided {specie} and {version} are not present on Hugging Face models!")
|
95 |
+
|
96 |
+
url: str = f"https://huggingface.co/brickmanlab/{specie.lower()}-scanvi/resolve/{version}/adata.h5ad"
|
97 |
+
return anndata.read_h5ad(_fetch_resource(url, filename=f"{specie.lower()}_v{version}.h5ad"))
|
98 |
|
99 |
|
100 |
def get_embedding(adata: anndata.AnnData, key: str) -> pd.DataFrame:
|
|
|
225 |
|
226 |
ax_ = ax if ax else st
|
227 |
ax_.plotly_chart(g, use_container_width=True)
|
228 |
+
|
229 |
+
|
230 |
+
def get_degs(adata: anndata.AnnData, key: str) -> pd.DataFrame:
|
231 |
+
"""Format DEGs to datagrame.
|
232 |
+
|
233 |
+
Code taken from https://github.com/scverse/scanpy/blob/1.10.4/src/scanpy/get/get.py#L27-L111
|
234 |
+
|
235 |
+
Parameters
|
236 |
+
----------
|
237 |
+
adata : anndata.AnnData
|
238 |
+
Annotated dataframe
|
239 |
+
key : str
|
240 |
+
Key used to store the degs
|
241 |
+
|
242 |
+
Returns
|
243 |
+
-------
|
244 |
+
pd.DataFrame
|
245 |
+
Dataframe of differentially expressed genes
|
246 |
+
"""
|
247 |
+
|
248 |
+
group = list(adata.uns[key]["names"].dtype.names)
|
249 |
+
colnames = ["names", "scores", "logfoldchanges", "pvals", "pvals_adj"]
|
250 |
+
|
251 |
+
d = [pd.DataFrame(adata.uns[key][c])[group] for c in colnames]
|
252 |
+
d = pd.concat(d, axis=1, names=[None, "group"], keys=colnames)
|
253 |
+
d = d.stack(level=1).reset_index()
|
254 |
+
d["group"] = pd.Categorical(d["group"], categories=group)
|
255 |
+
d = d.sort_values(["group", "level_0"]).drop(columns="level_0")
|
256 |
+
|
257 |
+
return d
|