Spaces:
No application file
No application file
#!/usr/bin/env python | |
# | |
# Restriction Analysis Libraries. | |
# Copyright (C) 2004. Frederic Sohm. | |
# | |
# This code is part of the Biopython distribution and governed by its | |
# license. Please see the LICENSE file that should have been included | |
# as part of this package. | |
# | |
r"""Print the results of restriction enzyme analysis. | |
PrintFormat prints the results from restriction analysis in 3 different | |
format: list, column or map. | |
The easiest way to use it is: | |
>>> from Bio.Restriction.PrintFormat import PrintFormat | |
>>> from Bio.Restriction.Restriction import RestrictionBatch | |
>>> from Bio.Seq import Seq | |
>>> pBs_mcs = Seq('GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC') | |
>>> restriction_batch = RestrictionBatch(['EcoRI', 'BamHI', 'ApaI']) | |
>>> result = restriction_batch.search(pBs_mcs) | |
>>> my_map = PrintFormat() | |
>>> my_map.print_that(result, 'My pBluescript mcs analysis:\n', | |
... 'No site:\n') | |
My pBluescript mcs analysis: | |
ApaI : 12. | |
EcoRI : 50. | |
No site: | |
BamHI | |
<BLANKLINE> | |
>>> my_map.sequence = pBs_mcs | |
>>> my_map.print_as("map") | |
>>> my_map.print_that(result) | |
12 ApaI | |
| | |
| 50 EcoRI | |
| | | |
GGTACCGGGCCCCCCCTCGAGGTCGACGGTATCGATAAGCTTGATATCGAATTC | |
|||||||||||||||||||||||||||||||||||||||||||||||||||||| | |
CCATGGCCCGGGGGGGAGCTCCAGCTGCCATAGCTATTCGAACTATAGCTTAAG | |
1 54 | |
<BLANKLINE> | |
<BLANKLINE> | |
Enzymes which do not cut the sequence. | |
<BLANKLINE> | |
BamHI | |
<BLANKLINE> | |
>>> | |
Some of the methods of PrintFormat are meant to be overridden by derived | |
class. | |
Use the following parameters to control the appearance: | |
- ConsoleWidth : width of the console used default to 80. | |
should never be less than 60. | |
- NameWidth : space attributed to the name in PrintList method. | |
- Indent : Indent of the second line. | |
- MaxSize : Maximal size of the sequence (default=6: | |
-> 99 999 bp + 1 trailing ',' | |
people are unlikely to ask for restriction map of sequences | |
bigger than 100.000 bp. This is needed to determine the | |
space to be reserved for sites location. | |
- MaxSize = 5 => 9.999 bp | |
- MaxSize = 6 => 99.999 bp | |
- MaxSize = 7 => 999.999 bp | |
Example output:: | |
<------------ ConsoleWidth ---------------> | |
<- NameWidth -> | |
EcoRI : 1, 45, 50, 300, 400, 650, | |
700, 1200, 2500. | |
<--> | |
Indent | |
""" # noqa: W291 | |
import re | |
class PrintFormat: | |
"""PrintFormat allow the printing of results of restriction analysis.""" | |
ConsoleWidth = 80 | |
NameWidth = 10 | |
MaxSize = 6 | |
Cmodulo = ConsoleWidth % NameWidth | |
PrefWidth = ConsoleWidth - Cmodulo | |
Indent = 4 | |
linesize = PrefWidth - NameWidth | |
def print_as(self, what="list"): | |
"""Print the results as specified. | |
Valid format are: | |
'list' -> alphabetical order | |
'number' -> number of sites in the sequence | |
'map' -> a map representation of the sequence with the sites. | |
If you want more flexibility over-ride the virtual method make_format. | |
""" | |
if what == "map": | |
self.make_format = self._make_map | |
elif what == "number": | |
self.make_format = self._make_number | |
else: | |
self.make_format = self._make_list | |
def format_output(self, dct, title="", s1=""): | |
"""Summarise results as a nicely formatted string. | |
Arguments: | |
- dct is a dictionary as returned by a RestrictionBatch.search() | |
- title is the title of the map. | |
It must be a formatted string, i.e. you must include the line break. | |
- s1 is the title separating the list of enzymes that have sites from | |
those without sites. | |
- s1 must be a formatted string as well. | |
The format of print_that is a list. | |
""" | |
if not dct: | |
dct = self.results | |
ls, nc = [], [] | |
for k, v in dct.items(): | |
if v: | |
ls.append((k, v)) | |
else: | |
nc.append(k) | |
return self.make_format(ls, title, nc, s1) | |
def print_that(self, dct, title="", s1=""): | |
"""Print the output of the format_output method (OBSOLETE). | |
Arguments: | |
- dct is a dictionary as returned by a RestrictionBatch.search() | |
- title is the title of the map. | |
It must be a formatted string, i.e. you must include the line break. | |
- s1 is the title separating the list of enzymes that have sites from | |
those without sites. | |
- s1 must be a formatted string as well. | |
This method prints the output of A.format_output() and it is here | |
for backwards compatibility. | |
""" | |
print(self.format_output(dct, title, s1)) | |
def make_format(self, cut=(), title="", nc=(), s1=""): | |
"""Virtual method used for formatting results. | |
Virtual method. | |
Here to be pointed to one of the _make_* methods. | |
You can as well create a new method and point make_format to it. | |
""" | |
return self._make_list(cut, title, nc, s1) | |
# _make_* methods to be used with the virtual method make_format | |
def _make_list(self, ls, title, nc, s1): | |
"""Summarise a list of positions by enzyme (PRIVATE). | |
Return a string of form:: | |
title. | |
enzyme1 : position1, position2. | |
enzyme2 : position1, position2, position3. | |
Arguments: | |
- ls is a tuple or list of cutting enzymes. | |
- title is the title. | |
- nc is a tuple or list of non cutting enzymes. | |
- s1 is the sentence before the non cutting enzymes. | |
""" | |
return self._make_list_only(ls, title) + self._make_nocut_only(nc, s1) | |
def _make_map(self, ls, title, nc, s1): | |
"""Summarise mapping information as a string (PRIVATE). | |
Return a string of form:: | |
| title. | |
| | |
| enzyme1, position | |
| | | |
| AAAAAAAAAAAAAAAAAAAAA... | |
| ||||||||||||||||||||| | |
| TTTTTTTTTTTTTTTTTTTTT... | |
Arguments: | |
- ls is a list of cutting enzymes. | |
- title is the title. | |
- nc is a list of non cutting enzymes. | |
- s1 is the sentence before the non cutting enzymes. | |
""" | |
return self._make_map_only(ls, title) + self._make_nocut_only(nc, s1) | |
def _make_number(self, ls, title, nc, s1): | |
"""Format cutting position information as a string (PRIVATE). | |
Returns a string in the form:: | |
title. | |
enzyme which cut 1 time: | |
enzyme1 : position1. | |
enzyme which cut 2 times: | |
enzyme2 : position1, position2. | |
... | |
Arguments: | |
- ls is a list of cutting enzymes. | |
- title is the title. | |
- nc is a list of non cutting enzymes. | |
- s1 is the sentence before the non cutting enzymes. | |
""" | |
return self._make_number_only(ls, title) + self._make_nocut_only(nc, s1) | |
def _make_nocut(self, ls, title, nc, s1): | |
"""Summarise non-cutting enzymes (PRIVATE). | |
Return a formatted string of the non cutting enzymes. | |
ls is a list of cutting enzymes -> will not be used. | |
Here for compatibility with make_format. | |
Arguments: | |
- title is the title. | |
- nc is a list of non cutting enzymes. | |
- s1 is the sentence before the non cutting enzymes. | |
""" | |
return title + self._make_nocut_only(nc, s1) | |
def _make_nocut_only(self, nc, s1, ls=(), title=""): | |
"""Summarise non-cutting enzymes (PRIVATE). | |
Return a formatted string of the non cutting enzymes. | |
Arguments: | |
- nc is a tuple or list of non cutting enzymes. | |
- s1 is the sentence before the non cutting enzymes. | |
""" | |
if not nc: | |
return s1 | |
st = "" | |
stringsite = s1 or "\n Enzymes which do not cut the sequence.\n\n" | |
Join = "".join | |
for key in sorted(nc): | |
st = Join((st, str.ljust(str(key), self.NameWidth))) | |
if len(st) > self.linesize: | |
stringsite = Join((stringsite, st, "\n")) | |
st = "" | |
stringsite = Join((stringsite, st, "\n")) | |
return stringsite | |
def _make_list_only(self, ls, title, nc=(), s1=""): | |
"""Summarise list of positions per enzyme (PRIVATE). | |
Return a string of form:: | |
title. | |
enzyme1 : position1, position2. | |
enzyme2 : position1, position2, position3. | |
... | |
Arguments: | |
- ls is a tuple or list of results. | |
- title is a string. | |
- Non cutting enzymes are not included. | |
""" | |
if not ls: | |
return title | |
return self.__next_section(ls, title) | |
def _make_number_only(self, ls, title, nc=(), s1=""): | |
"""Summarise number of cuts as a string (PRIVATE). | |
Return a string of form:: | |
title. | |
enzyme which cut 1 time: | |
enzyme1 : position1. | |
enzyme which cut 2 times: | |
enzyme2 : position1, position2. | |
... | |
Arguments: | |
- ls is a list of results. | |
- title is a string. | |
- Non cutting enzymes are not included. | |
""" | |
if not ls: | |
return title | |
ls.sort(key=lambda x: len(x[1])) | |
iterator = iter(ls) | |
cur_len = 1 | |
new_sect = [] | |
for name, sites in iterator: | |
length = len(sites) | |
if length > cur_len: | |
title += "\n\nenzymes which cut %i times :\n\n" % cur_len | |
title = self.__next_section(new_sect, title) | |
new_sect, cur_len = [(name, sites)], length | |
continue | |
new_sect.append((name, sites)) | |
title += "\n\nenzymes which cut %i times :\n\n" % cur_len | |
return self.__next_section(new_sect, title) | |
def _make_map_only(self, ls, title, nc=(), s1=""): | |
"""Make string describing cutting map (PRIVATE). | |
Return a string of form:: | |
| title. | |
| | |
| enzyme1, position | |
| | | |
| AAAAAAAAAAAAAAAAAAAAA... | |
| ||||||||||||||||||||| | |
| TTTTTTTTTTTTTTTTTTTTT... | |
Arguments: | |
- ls is a list of results. | |
- title is a string. | |
- Non cutting enzymes are not included. | |
""" | |
if not ls: | |
return title | |
resultKeys = sorted(str(x) for x, y in ls) | |
map = title or "" | |
enzymemap = {} | |
for (enzyme, cut) in ls: | |
for c in cut: | |
if c in enzymemap: | |
enzymemap[c].append(str(enzyme)) | |
else: | |
enzymemap[c] = [str(enzyme)] | |
mapping = sorted(enzymemap.keys()) | |
cutloc = {} | |
x, counter, length = 0, 0, len(self.sequence) | |
for x in range(60, length, 60): | |
counter = x - 60 | |
loc = [] | |
cutloc[counter] = loc | |
remaining = [] | |
for key in mapping: | |
if key <= x: | |
loc.append(key) | |
else: | |
remaining.append(key) | |
mapping = remaining | |
cutloc[x] = mapping | |
sequence = str(self.sequence) | |
revsequence = str( | |
self.sequence.complement(inplace=False) | |
) # TODO: remove inplace=False | |
a = "|" | |
base, counter = 0, 0 | |
emptyline = " " * 60 | |
Join = "".join | |
for base in range(60, length, 60): | |
counter = base - 60 | |
line = emptyline | |
for key in cutloc[counter]: | |
s = "" | |
if key == base: | |
for n in enzymemap[key]: | |
s = " ".join((s, n)) | |
chunk = line[0:59] | |
lineo = Join((chunk, str(key), s, "\n")) | |
line2 = Join((chunk, a, "\n")) | |
linetot = Join((lineo, line2)) | |
map = Join((map, linetot)) | |
break | |
for n in enzymemap[key]: | |
s = " ".join((s, n)) | |
k = key % 60 | |
lineo = Join((line[0 : (k - 1)], str(key), s, "\n")) | |
line = Join((line[0 : (k - 1)], a, line[k:])) | |
line2 = Join((line[0 : (k - 1)], a, line[k:], "\n")) | |
linetot = Join((lineo, line2)) | |
map = Join((map, linetot)) | |
mapunit = "\n".join( | |
( | |
sequence[counter:base], | |
a * 60, | |
revsequence[counter:base], | |
Join( | |
( | |
str.ljust(str(counter + 1), 15), | |
" " * 30, | |
str.rjust(str(base), 15), | |
"\n\n", | |
) | |
), | |
) | |
) | |
map = Join((map, mapunit)) | |
line = " " * 60 | |
for key in cutloc[base]: | |
s = "" | |
if key == length: | |
for n in enzymemap[key]: | |
s = Join((s, " ", n)) | |
chunk = line[0 : (length - 1)] | |
lineo = Join((chunk, str(key), s, "\n")) | |
line2 = Join((chunk, a, "\n")) | |
linetot = Join((lineo, line2)) | |
map = Join((map, linetot)) | |
break | |
for n in enzymemap[key]: | |
s = Join((s, " ", n)) | |
k = key % 60 | |
lineo = Join((line[0 : (k - 1)], str(key), s, "\n")) | |
line = Join((line[0 : (k - 1)], a, line[k:])) | |
line2 = Join((line[0 : (k - 1)], a, line[k:], "\n")) | |
linetot = Join((lineo, line2)) | |
map = Join((map, linetot)) | |
mapunit = "" | |
mapunit = Join((sequence[base:length], "\n")) | |
mapunit = Join((mapunit, a * (length - base), "\n")) | |
mapunit = Join((mapunit, revsequence[base:length], "\n")) | |
mapunit = Join( | |
( | |
mapunit, | |
Join( | |
( | |
str.ljust(str(base + 1), 15), | |
" " * (length - base - 30), | |
str.rjust(str(length), 15), | |
"\n\n", | |
) | |
), | |
) | |
) | |
map = Join((map, mapunit)) | |
return map | |
# private method to do lists: | |
def __next_section(self, ls, into): | |
"""Next section (PRIVATE). | |
Arguments: | |
- ls is a tuple/list of tuple (string, [int, int]). | |
- into is a string to which the formatted ls will be added. | |
Format ls as a string of lines: | |
The form is:: | |
enzyme1 : position1. | |
enzyme2 : position2, position3. | |
then add the formatted ls to tot | |
return tot. | |
""" | |
indentation = "\n" + (self.NameWidth + self.Indent) * " " | |
linesize = self.linesize - self.MaxSize | |
pat = re.compile(r"([\w,\s()]){1,%i}[,\.]" % linesize) | |
several, Join = "", "".join | |
for name, sites in sorted(ls): | |
stringsite = "" | |
output = Join((", ".join(str(site) for site in sites), ".")) | |
if len(output) > linesize: | |
# | |
# cut where appropriate and add the indentation | |
# | |
output = [x.group() for x in re.finditer(pat, output)] | |
stringsite = indentation.join(output) | |
else: | |
stringsite = output | |
into = Join( | |
(into, str(name).ljust(self.NameWidth), " : ", stringsite, "\n") | |
) | |
return into | |