#!/usr/bin/env python
# coding: utf-8

# # Multi omics analysis by MOFA
# MOFA is a factor analysis model that provides a general framework for the integration of multi-omic data sets in an unsupervised fashion.
# 
# This tutorial focuses on how to perform mofa in multi-omics like scRNA-seq and scATAC-seq
# 
# Paper: [MOFA+: a statistical framework for comprehensive integration of multi-modal single-cell data](https://genomebiology.biomedcentral.com/articles/10.1186/s13059-020-02015-1)
# 
# Code: https://github.com/bioFAM/mofapy2
# 
# Colab_Reproducibility:https://colab.research.google.com/drive/1UPGQA3BenrC-eLIGVtdKVftSnOKIwNrP?usp=sharing

# ## Part.1 MOFA Model
# In this part, we construct a model of mofa by scRNA-seq and scATAC-seq

# In[1]:


import omicverse as ov
rna=ov.utils.read('data/sample/rna_p_n_raw.h5ad')
atac=ov.utils.read('data/sample/atac_p_n_raw.h5ad')


# In[2]:


rna,atac


# We only need to add anndata to `ov.single.mofa` to construct the base model

# In[4]:


test_mofa=ov.single.pyMOFA(omics=[rna,atac],
                             omics_name=['RNA','ATAC'])


# In[ ]:


test_mofa.mofa_preprocess()
test_mofa.mofa_run(outfile='models/brac_rna_atac.hdf5')


# ## Part.2 MOFA Analysis
# After get the model by mofa, we need to analysis the factor about different omics, we provide some method to do this

# ### load data

# In[1]:


import omicverse as ov
ov.utils.ov_plot_set()


# In[2]:


rna=ov.utils.read('data/sample/rna_test.h5ad')


# ### add value of factor to anndata

# In[3]:


rna=ov.single.factor_exact(rna,hdf5_path='data/sample/MOFA_POS.hdf5')
rna


# ### analysis of the correlation between factor and cell type

# In[4]:


ov.single.factor_correlation(adata=rna,cluster='cell_type',factor_list=[1,2,3,4,5])


# ### Get the gene/feature weights of different factor

# In[5]:


ov.single.get_weights(hdf5_path='data/sample/MOFA_POS.hdf5',view='RNA',factor=1)


# ## Part.3 MOFA Visualize
# 
# To visualize the result of mofa, we provide a series of function to do this.

# In[6]:


pymofa_obj=ov.single.pyMOFAART(model_path='data/sample/MOFA_POS.hdf5')


# We get the factor of each cell at first

# In[7]:


pymofa_obj.get_factors(rna)
rna


# We can also plot the varience in each View

# In[8]:


pymofa_obj.plot_r2()


# In[9]:


pymofa_obj.get_r2()


# ### Visualize the correlation between factor and celltype

# In[10]:


pymofa_obj.plot_cor(rna,'cell_type')


# We found that factor6 is correlated to Epithelial

# In[11]:


pymofa_obj.plot_factor(rna,'cell_type','Epi',figsize=(3,3),
                    factor1=6,factor2=10,)


# In[24]:


import scanpy as sc
sc.pp.neighbors(rna)
sc.tl.umap(rna)
sc.pl.embedding(
    rna,
    basis="X_umap",
    color=["factor6","cell_type"],
    frameon=False,
    ncols=2,
    #palette=ov.utils.pyomic_palette(),
    show=False,
    cmap='Greens',
    vmin=0,
)
#plt.savefig("figures/umap_factor6.png",dpi=300,bbox_inches = 'tight')


# In[12]:


pymofa_obj.plot_weight_gene_d1(view='RNA',factor1=6,factor2=10,)


# In[18]:


pymofa_obj.plot_weights(view='RNA',factor=6,color='#5de25d',
                        ascending=True)


# In[14]:


pymofa_obj.plot_top_feature_heatmap(view='RNA')


# In[ ]: