maom commited on
Commit
57a3fc8
·
verified ·
1 Parent(s): f23e50d

initial chart

Browse files
Files changed (1) hide show
  1. app.py +66 -0
app.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import datasets
2
+ import streamlit as st
3
+ import numpy as np
4
+ import altair as alt
5
+
6
+ st.set_page_config(layout='wide')
7
+
8
+ st.markdown("""
9
+ # CryptoCEN Expression Scatter
10
+ **CryptoCEN** is a co-expression network for *Cryptococcus neoformans* built on 1,524 RNA-seq runs across 34 studies.
11
+ A pair of genes are said to be co-expressed when their expression is correlated across different conditions and
12
+ is often a marker for genes to be involved in similar processes.
13
+ To Cite:
14
+ MJ O'Meara, JR Rapala, CB Nichols, C Alexandre, B Billmyre, JL Steenwyk, A Alspaugh,
15
+ TR O'Meara CryptoCEN: A Co-Expression Network for Cryptococcus neoformans reveals
16
+ novel proteins involved in DNA damage repair
17
+ * Code available at https://github.com/maomlab/CalCEN/tree/master/vignettes/CryptoCEN
18
+ * Full network and dataset: https://huggingface.co/datasets/maomlab/CryptoCEN
19
+
20
+ ## Look up top-coexpressed partners:
21
+ Put in the ``CNAG_#####`` gene_id for a gene and expand the table to get the top 50 co-expressed genes.
22
+ ``coexp_score`` ranges between ``[0-1]``, where ``1`` is the best and greater than ``0.85`` can be considered significant.
23
+ """)
24
+
25
+ estimated_expression = datasets.load_dataset(
26
+ path = "maomlab/CryptoCEN",
27
+ data_files = {"estimated_expression": "estimated_expression.tsv"})
28
+ estimated_expression = estimated_expression["estimated_expression"].to_pandas()
29
+
30
+ estimated_expression_meta = datasets.load_dataset(
31
+ path = "maomlab/CryptoCEN/Data",
32
+ data_files = {"estimated_expression_meta": "estimated_expression_meta.tsv"})
33
+ estimated_expression_meta = estimated_expression_meta["estimated_expression_meta"].to_pandas()
34
+
35
+ col1, col2, col3 = st.columns(spec = [0.3, 0.2, 0.5])
36
+ with col1:
37
+ gene_id_1 = st.text_input(
38
+ label = "Gene ID 1",
39
+ value = "CNAG_04365",
40
+ max_chars = 10,
41
+ help = "CNAG Gene ID e.g. CNAG_04365")
42
+
43
+ with col2:
44
+ gene_id_2 = st.text_input(
45
+ label = "Gene ID 2",
46
+ value = "CNAG_04222",
47
+ max_chars = 10,
48
+ help = "CNAG Gene ID e.g. CNAG_04222")
49
+
50
+ chart_data = pd.DataFrame({
51
+ "expression_1": estimated_expression[estimated_expression.index == gene_id_1,],
52
+ "expression_2": estimated_expression[estimated_expression.index == gene_id_2,],
53
+ "run_accession": estimated_expression.columns,
54
+ "run_accession_meta": estimated_expression_meta["run_accession"],
55
+ "study_accession": estimated_expression_meta["study_accession"])
56
+
57
+ print(f"run_ids are equal: {sum(chart_data["run_accession"] == chart_data["run_accession_meta"])}")
58
+
59
+ chart = (
60
+ alt.Chart(chart_data)
61
+ .mark_circle()
62
+ .encode(x="expression_1", y="expression_2", size=5, color="study_accession", tooltip=["run_accession", "study_accession"]))
63
+
64
+ st.altair_chart(chart, use_container_width=True)
65
+
66
+