Spaces:
Sleeping
Sleeping
File size: 5,574 Bytes
2999286 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 |
#!/usr/bin/env python # coding: utf-8 # # Multi omics analysis by MOFA and GLUE # MOFA is a factor analysis model that provides a general framework for the integration of multi-omic data sets in an unsupervised fashion. # # Most of the time, however, we did not get paired cells in the multi-omics analysis. Here, we can pair cells using GLUE, a dimensionality reduction algorithm that can integrate different histological layers, and it can efficiently merge data from different histological layers. # # This tutorial focuses on how to perform mofa in multi-omics using GLUE. # # Paper: [MOFA+: a statistical framework for comprehensive integration of multi-modal single-cell data](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02015-1) and [Multi-omics single-cell data integration and regulatory inference with graph-linked embedding](https://www.nature.com/articles/s41587-022-01284-4) # # Code: https://github.com/bioFAM/mofapy2 and https://github.com/gao-lab/GLUE # # Colab_Reproducibility:https://colab.research.google.com/drive/1zlakFf20IoBdyuOQDocwFQHu8XOVizRL?usp=sharing # # We used the result anndata object `rna-emb.h5ad` and `atac.emb.h5ad` from [GLUE'tutorial](https://scglue.readthedocs.io/en/latest/training.html) # In[1]: import omicverse as ov ov.utils.ov_plot_set() # ## Load the data # # We use `ov.utils.read` to read the `h5ad` files # In[2]: rna=ov.utils.read("chen_rna-emb.h5ad") atac=ov.utils.read("chen_atac-emb.h5ad") # ## Pair the omics # # Each cell in our rna and atac data has a feature vector, X_glue, based on which we can calculate the Pearson correlation coefficient to perform cell matching. # In[3]: pair_obj=ov.single.GLUE_pair(rna,atac) pair_obj.correlation() # We counted the top 50 highly correlated cells in another histology layer for each cell in one of the histology layers to avoid missing data due to one cell being highly correlated with multiple cells. The default minimum threshold for high correlation is 0.9. We can obtain more paired cells by increasing the depth, but note that increasing the depth may lead to higher errors in cell matching # In[4]: res_pair=pair_obj.find_neighbor_cell(depth=20) res_pair.to_csv('models/chen_pair_res.csv') # We filter to get paired cells # In[14]: rna1=rna[res_pair['omic_1']] atac1=atac[res_pair['omic_2']] rna1.obs.index=res_pair.index atac1.obs.index=res_pair.index rna1,atac1 # We can use mudata to store the multi-omics # In[6]: from mudata import MuData mdata = MuData({'rna': rna1, 'atac': atac1}) mdata # In[7]: mdata.write("chen_mu.h5mu",compression='gzip') # ## MOFA prepare # # In the MOFA analysis, we only need to use highly variable genes, for which we perform one filter # In[22]: rna1=mdata['rna'] rna1=rna1[:,rna1.var['highly_variable']==True] atac1=mdata['atac'] atac1=atac1[:,atac1.var['highly_variable']==True] rna1.obs.index=res_pair.index atac1.obs.index=res_pair.index # In[23]: import random random_obs_index=random.sample(list(rna1.obs.index),5000) # In[25]: from sklearn.metrics import adjusted_rand_score as ari ari_random=ari(rna1[random_obs_index].obs['cell_type'], atac1[random_obs_index].obs['cell_type']) ari_raw=ari(rna1.obs['cell_type'], atac1.obs['cell_type']) print('raw ari:{}, random ari:{}'.format(ari_raw,ari_random)) # In[26]: #rna1=rna1[random_obs_index] #atac1=atac1[random_obs_index] # ## MOFA analysis # # In this part, we construct a model of mofa by scRNA-seq and scATAC-seq # In[28]: test_mofa=ov.single.pyMOFA(omics=[rna1,atac1], omics_name=['RNA','ATAC']) # In[29]: test_mofa.mofa_preprocess() test_mofa.mofa_run(outfile='models/chen_rna_atac.hdf5') # ## MOFA Visualization # # In this part, we provide a series of function to visualize the result of mofa. # In[30]: pymofa_obj=ov.single.pyMOFAART(model_path='models/chen_rna_atac.hdf5') # In[31]: pymofa_obj.get_factors(rna1) rna1 # ### Visualize the varience of each view # In[32]: pymofa_obj.plot_r2() # In[33]: pymofa_obj.get_r2() # ### Visualize the correlation between factor and celltype # In[37]: pymofa_obj.plot_cor(rna1,'cell_type',figsize=(4,6)) # In[38]: pymofa_obj.get_cor(rna1,'cell_type') # In[46]: pymofa_obj.plot_factor(rna1,'cell_type','Ast',figsize=(3,3), factor1=1,factor2=3,) # ### Visualize the factor in UMAP # # To visualize the GLUE’s learned embeddings, we use the pymde package wrapperin scvi-tools. This is an alternative to UMAP that is GPU-accelerated. # # You can use `sc.tl.umap` insteaded. # In[41]: from scvi.model.utils import mde import scanpy as sc sc.pp.neighbors(rna1, use_rep="X_glue", metric="cosine") rna1.obsm["X_mde"] = mde(rna1.obsm["X_glue"]) # In[47]: sc.pl.embedding( rna1, basis="X_mde", color=["factor1","factor3","cell_type"], frameon=False, ncols=3, #palette=ov.utils.pyomic_palette(), show=False, cmap='Greens', vmin=0, ) # ### Weights ranked # A visualization of factor weights familiar to MOFA and MOFA+ users is implemented with some modifications in `plot_weight_gene_d1`, `plot_weight_gene_d2`, and `plot_weights`. # In[48]: pymofa_obj.plot_weight_gene_d1(view='RNA',factor1=1,factor2=3,) # In[50]: pymofa_obj.plot_weights(view='RNA',factor=1, ascending=False) # ### Weights heatmap # # While trying to annotate factors, a global overview of top features defining them could be helpful. # In[51]: pymofa_obj.plot_top_feature_heatmap(view='RNA') # In[ ]: |