Spaces:
No application file
No application file
File size: 4,121 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# Copyright 2005 by Jonathan Taylor.
# All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Cleaved amplified polymorphic sequence (CAPS) markers.
A CAPS marker is a location a DifferentialCutsite as described below and a
set of primers that can be used to visualize this. More information can
be found in the paper `Konieczny and Ausubel (1993)`_ (PMID 8106085).
.. _`Konieczny and Ausubel (1993)`: https://doi.org/10.1046/j.1365-313X.1993.04020403.x
"""
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq
class DifferentialCutsite:
"""Differential enzyme cutsite in an alignment.
A differential cutsite is a location in an alignment where an enzyme cuts
at least one sequence and also cannot cut at least one other sequence.
Members:
- start - Where it lives in the alignment.
- enzyme - The enzyme that causes this.
- cuts_in - A list of sequences (as indexes into the alignment) the
enzyme cuts in.
- blocked_in - A list of sequences (as indexes into the alignment) the
enzyme is blocked in.
"""
def __init__(self, **kwds):
"""Initialize a DifferentialCutsite.
Each member (as listed in the class description) should be included as a
keyword.
"""
self.start = int(kwds["start"])
self.enzyme = kwds["enzyme"]
self.cuts_in = kwds["cuts_in"]
self.blocked_in = kwds["blocked_in"]
class AlignmentHasDifferentLengthsError(Exception):
"""Exception where sequences in alignment have different lengths."""
pass
class CAPSMap:
"""A map of an alignment showing all possible dcuts.
Members:
- alignment - The alignment that is mapped.
- dcuts - A list of possible CAPS markers in the form of
DifferentialCutsites.
"""
def __init__(self, alignment, enzymes=None):
"""Initialize the CAPSMap.
Required:
- alignment - The alignment to be mapped.
Optional:
- enzymes - List of enzymes to be used to create the map.
Defaults to an empty list.
"""
if enzymes is None:
enzymes = []
if isinstance(alignment, MultipleSeqAlignment):
self.sequences = [rec.seq for rec in alignment]
self.size = len(self.sequences)
self.length = len(self.sequences[0])
for seq in self.sequences:
if len(seq) != self.length:
raise AlignmentHasDifferentLengthsError
else: # Alignment object
self.sequences = [Seq(s) for s in alignment]
self.size, self.length = alignment.shape
self.alignment = alignment
self.enzymes = enzymes
# look for dcuts
self._digest()
def _digest_with(self, enzyme):
cuts = [] # list of lists, one per sequence
all_seq_cuts = []
# go through each sequence
for seq in self.sequences:
# grab all the cuts in the sequence
seq_cuts = [cut - enzyme.fst5 for cut in enzyme.search(seq)]
# maintain a list of all cuts in all sequences
all_seq_cuts.extend(seq_cuts)
cuts.append(seq_cuts)
# we sort the all list and remove duplicates
all_seq_cuts = sorted(set(all_seq_cuts))
for cut in all_seq_cuts:
# test for dcuts
cuts_in = []
blocked_in = []
for i, seq in enumerate(self.sequences):
if cut in cuts[i]:
cuts_in.append(i)
else:
blocked_in.append(i)
if cuts_in and blocked_in:
self.dcuts.append(
DifferentialCutsite(
start=cut, enzyme=enzyme, cuts_in=cuts_in, blocked_in=blocked_in
)
)
def _digest(self):
self.dcuts = []
for enzyme in self.enzymes:
self._digest_with(enzyme)
|