Spaces:
Sleeping
Sleeping
File size: 6,535 Bytes
2999286 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 |
#!/usr/bin/env python # coding: utf-8 # # Spatial integration and clustering # # STAligner is designed for alignment and integration of spatially resolved transcriptomics data. # # STAligner first normalizes the expression profiles for all spots and constructs a spatial neighbor network using the spatial coordinates. STAligner further employs a graph attention auto-encoder neural network to extract spatially aware embedding, and constructs the spot triplets based on current embeddings to guide the alignment process by attracting similar spots and discriminating dissimilar spots across slices. STAligner introduces the triplet loss to update the spot embedding to reduce the distance from the anchor to positive spot, and increase the distance from the anchor to negative spot. The triplet construction and auto-encoder training are optimized iteratively until batch-corrected embeddings are generated. STAligner can be applied to integrate ST datasets to achieve alignment and simultaneous identification of spatial domains from different biological samples in (a), technological platforms (I), developmental (embryonic) stages (II), disease conditions (III) and consecutive slices of a tissue for 3D slice alignment (IV # # Zhou, X., Dong, K. & Zhang, S. Integrating spatial transcriptomics data across different conditions, technologies and developmental stages. Nat Comput Sci 3, 894–906 (2023). https://doi.org/10.1038/s43588-023-00528-w # # ![image.png](attachment:00790548-59f9-4fad-a1e3-a52c3ae98d44.png)). # In[1]: from scipy.sparse import csr_matrix import omicverse as ov import scanpy as sc import anndata as ad import pandas as pd import os ov.utils.ov_plot_set() # # Preprocess data # # Here, We use the mouse olfactory bulb data generated by Stereo-seq and Slide-seqV2. The processed Stereo-seq and Slide-seqV2 data can be downloaded from https://drive.google.com/drive/folders/1Omte1adVFzyRDw7VloOAQYwtv_NjdWcG?usp=share_link. and the original tutorals can be finded from https://staligner.readthedocs.io/en/latest # # Here is a critical point that must be clarified: for STAligner, it first calculates highly variable genes before concating annadata samples. Therefore, the number of highly variable genes should not be selected too low. Otherwise, in the case of a large number of samples, the downstream features for STAligner training would be insufficient, impacting the model's performance. # # When using STAligner, it is necessary to adjust the **rad_cutoff** parameter according to different data to ensure that each spot has an **average of 5-10 adjacent spots** connected to it. Such as: "11.3356 neighbors per cell on average." # # In[2]: Batch_list = [] adj_list = [] section_ids = ['Slide-seqV2_MoB', 'Stereo-seq_MoB'] print(section_ids) pathway = '/storage/zengjianyangLab/hulei/scRNA-seq/scripts/STAligner' for section_id in section_ids: print(section_id) adata = sc.read_h5ad(os.path.join(pathway,section_id+".h5ad")) # check whether the adata.X is sparse matrix if isinstance(adata.X, pd.DataFrame): adata.X = csr_matrix(adata.X) else: pass adata.var_names_make_unique(join="++") # make spot name unique adata.obs_names = [x+'_'+section_id for x in adata.obs_names] # Constructing the spatial network ov.space.Cal_Spatial_Net(adata, rad_cutoff=50) # the spatial network are saved in adata.uns[‘adj’] # Normalization sc.pp.highly_variable_genes(adata, flavor="seurat_v3", n_top_genes=10000) sc.pp.normalize_total(adata, target_sum=1e4) sc.pp.log1p(adata) adata = adata[:, adata.var['highly_variable']] adj_list.append(adata.uns['adj']) Batch_list.append(adata) # In[3]: Batch_list # In[4]: adata_concat = ad.concat(Batch_list, label="slice_name", keys=section_ids) adata_concat.obs["batch_name"] = adata_concat.obs["slice_name"].astype('category') print('adata_concat.shape: ', adata_concat.shape) # # Training STAligner model # # Here, we used `ov.space.pySTAligner` to construct a STAGATE object to train the model. # # We are using the `train_STAligner_subgraph` function from STAligner to reduce GPU memory usage, each slice is considered as a subgraph for training. # In[5]: get_ipython().run_cell_magic('time', '', "# iter_comb is used to specify the order of integration. For example, (0, 1) means slice 0 will be algined with slice 1 as reference.\niter_comb = [(i, i + 1) for i in range(len(section_ids) - 1)]\n\n# Here, to reduce GPU memory usage, each slice is considered as a subgraph for training.\nSTAligner_obj = ov.space.pySTAligner(adata_concat, verbose=True, knn_neigh = 100, n_epochs = 600, iter_comb = iter_comb,\n batch_key = 'batch_name', key_added='STAligner', Batch_list = Batch_list)\n") # In[6]: STAligner_obj.train() # We stored the latent embedding in `adata.obsm['STAligner']`. # In[7]: adata = STAligner_obj.predicted() # # Clustering the space # # We can use `GMM`, `leiden` or `louvain` to cluster the space. # # `ov.utils.cluster(adata,use_rep='STAligner',method='GMM',n_components=7,covariance_type='full', tol=1e-9, max_iter=1000, random_state=3607` # # or `sc.pp.neighbors(adata, use_rep='STAligner', random_state=666)` # `ov.utils.cluster(adata,use_rSTAlignerGATE',method='leiden',resolution=1)` # In[8]: sc.pp.neighbors(adata, use_rep='STAligner', random_state=666) ov.utils.cluster(adata,use_rep='STAligner',method='leiden',resolution=0.4) sc.tl.umap(adata, random_state=666) sc.pl.umap(adata, color=['batch_name',"leiden"],wspace=0.5) # We can also map the clustering results back to the original spatial coordinates to obtain spatially specific clustering results. # In[9]: import matplotlib.pyplot as plt spot_size = 50 title_size = 15 fig, ax = plt.subplots(1, 2, figsize=(6, 3), gridspec_kw={'wspace': 0.05, 'hspace': 0.2}) _sc_0 = sc.pl.spatial(adata[adata.obs['batch_name'] == 'Slide-seqV2_MoB'], img_key=None, color=['leiden'], title=['Slide-seqV2'], legend_fontsize=10, show=False, ax=ax[0], frameon=False, spot_size=spot_size, legend_loc=None) _sc_0[0].set_title('Slide-seqV2', size=title_size) _sc_1 = sc.pl.spatial(adata[adata.obs['batch_name'] == 'Stereo-seq_MoB'], img_key=None, color=['leiden'], title=['Stereo-seq'], legend_fontsize=10, show=False, ax=ax[1], frameon=False, spot_size=spot_size) _sc_1[0].set_title('Stereo-seq',size=title_size) _sc_1[0].invert_yaxis() plt.show() # In[ ]: |