File size: 448 Bytes
8a7f15e |
1 2 3 4 5 6 7 8 9 10 11 |
wget -O ftp://ftp.ebi.ac.uk/pub/databases/RNAcentral/current_release/sequences/rnacentral_active.fasta.gz
gunzip rnacentral_active.fasta.gz
# deduplication (mmseqs2 required)
mmseqs createdb rnacentral_active.fasta rnacentral_db
mmseqs cluster rnacentral_db rnacentral_cluster tmp --min-seq-id 0.8 -c 0.8
mmseqs createsubdb rnacentral_cluster rnacentral_db rnacentral_rep
mmseqs convert2fasta rnacentral_rep rnacentral_clustered.fasta
rm -rf tmp
|