File size: 1,439 Bytes
0b11a42 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import functools
import typing as ty
import pandas as pd
import RNA
@functools.lru_cache()
def duplex_energy(s1: str, s2: str) -> float:
return RNA.duplexfold(s1, s2).energy
@functools.lru_cache()
def folded_sequence(sequence, model_details):
folder = RNA.fold_compound(sequence, model_details)
dot_bracket, mfe = folder.mfe()
return dot_bracket, mfe
def fold_sequences(
sequences: ty.Iterable[str], temperature: float = 37.0,
) -> pd.DataFrame:
md = RNA.md()
md.temperature = temperature
seq2structure_map = {
"sequence": [],
f"structure_{int(temperature)}": [],
f"mfe_{int(temperature)}": [],
}
for sequence in sequences:
dot_bracket, mfe = folded_sequence(sequence, md)
seq2structure_map["sequence"].append(sequence)
seq2structure_map[f"structure_{int(temperature)}"].append(dot_bracket)
seq2structure_map[f"mfe_{int(temperature)}"].append(mfe)
return pd.DataFrame(seq2structure_map).set_index("sequence")
def fraction(seq: str, nucleoids: str) -> float:
"""Computes the fraction of the sequence string that is the set of nucleoids
given.
Parameters
----------
seq : str
The sequence string
nucleoids : str
The list of nucleoids to compute the fraction for.
Returns
-------
float
The fraction
"""
return sum([seq.count(n) for n in nucleoids]) / len(seq) |