Spaces:
No application file
No application file
# Copyright 2004 by Bob Bussell. All rights reserved. | |
# This code is part of the Biopython distribution and governed by its | |
# license. Please see the LICENSE file that should have been included | |
# as part of this package. | |
"""Tools to manipulate data from nmrview .xpk peaklist files.""" | |
HEADERLEN = 6 | |
class XpkEntry: | |
"""Provide dictionary access to single entry from nmrview .xpk file. | |
This class is suited for handling single lines of non-header data | |
from an nmrview .xpk file. This class provides methods for extracting | |
data by the field name which is listed in the last line of the | |
peaklist header. | |
Parameters | |
---------- | |
xpkentry : str | |
The line from an nmrview .xpk file. | |
xpkheadline : str | |
The line from the header file that gives the names of the entries. | |
This is typically the sixth line of the header, 1-origin. | |
Attributes | |
---------- | |
fields : dict | |
Dictionary of fields where key is in header line, value is an entry. | |
Variables are accessed by either their name in the header line as in | |
self.field["H1.P"] will return the H1.P entry for example. | |
self.field["entrynum"] returns the line number (1st field of line) | |
""" | |
def __init__(self, entry, headline): | |
"""Initialize the class.""" | |
# Holds all fields from input line in a dictionary | |
# keys are data labels from the .xpk header | |
datlist = entry.split() | |
headlist = headline.split() | |
self.fields = dict(zip(headlist, datlist[1:])) | |
try: | |
self.fields["entrynum"] = datlist[0] | |
except IndexError: | |
pass | |
class Peaklist: | |
"""Provide access to header lines and data from a nmrview xpk file. | |
Header file lines and file data are available as attributes. | |
Parameters | |
---------- | |
infn : str | |
The input nmrview filename. | |
Attributes | |
---------- | |
firstline : str | |
The first line in the header. | |
axislabels : str | |
The axis labels. | |
dataset : str | |
The label of the dataset. | |
sw : str | |
The sw coordinates. | |
sf : str | |
The sf coordinates. | |
datalabels : str | |
The labels of the entries. | |
data : list | |
File data after header lines. | |
Examples | |
-------- | |
>>> from Bio.NMR.xpktools import Peaklist | |
>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') | |
>>> peaklist.firstline | |
'label dataset sw sf ' | |
>>> peaklist.dataset | |
'test.nv' | |
>>> peaklist.sf | |
'{599.8230 } { 60.7860 } { 60.7860 }' | |
>>> peaklist.datalabels | |
' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat ' | |
""" | |
def __init__(self, infn): | |
"""Initialize the class.""" | |
with open(infn) as infile: | |
# Read in the header lines | |
self.firstline = infile.readline().split("\012")[0] | |
self.axislabels = infile.readline().split("\012")[0] | |
self.dataset = infile.readline().split("\012")[0] | |
self.sw = infile.readline().split("\012")[0] | |
self.sf = infile.readline().split("\012")[0] | |
self.datalabels = infile.readline().split("\012")[0] | |
# Read in the data lines to a list | |
self.data = [line.split("\012")[0] for line in infile] | |
def residue_dict(self, index): | |
"""Return a dict of lines in 'data' indexed by residue number or a nucleus. | |
The nucleus should be given as the input argument in the same form as | |
it appears in the xpk label line (H1, 15N for example) | |
Parameters | |
---------- | |
index : str | |
The nucleus to index data by. | |
Returns | |
------- | |
resdict : dict | |
Mappings of index nucleus to data line. | |
Examples | |
-------- | |
>>> from Bio.NMR.xpktools import Peaklist | |
>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk') | |
>>> residue_d = peaklist.residue_dict('H1') | |
>>> sorted(residue_d.keys()) | |
['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres'] | |
>>> residue_d['10'] | |
['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0'] | |
""" | |
maxres = -1 | |
minres = -1 | |
# Cast the data lines into the xpentry class | |
self.dict = {} | |
for line in self.data: | |
ind = XpkEntry(line, self.datalabels).fields[index + ".L"] | |
key = ind.split(".")[0] | |
res = int(key) | |
if maxres == -1: | |
maxres = res | |
if minres == -1: | |
minres = res | |
maxres = max([maxres, res]) | |
minres = min([minres, res]) | |
res = str(res) | |
try: | |
# Append additional data to list under same key | |
self.dict[res].append(line) | |
except KeyError: | |
# This is a new residue, start a new list | |
self.dict[res] = [line] # Use [] for list type | |
self.dict["maxres"] = maxres | |
self.dict["minres"] = minres | |
return self.dict | |
def write_header(self, outfn): | |
"""Write header lines from input file to handle ``outfn``.""" | |
with open(outfn, "w") as outfile: | |
outfile.write(self.firstline) | |
outfile.write("\012") | |
outfile.write(self.axislabels) | |
outfile.write("\012") | |
outfile.write(self.dataset) | |
outfile.write("\012") | |
outfile.write(self.sw) | |
outfile.write("\012") | |
outfile.write(self.sf) | |
outfile.write("\012") | |
outfile.write(self.datalabels) | |
outfile.write("\012") | |
def replace_entry(line, fieldn, newentry): | |
"""Replace an entry in a string by the field number. | |
No padding is implemented currently. Spacing will change if | |
the original field entry and the new field entry are of | |
different lengths. | |
""" | |
# This method depends on xpktools._find_start_entry | |
start = _find_start_entry(line, fieldn) | |
leng = len(line[start:].split()[0]) | |
newline = line[:start] + str(newentry) + line[(start + leng) :] | |
return newline | |
def _find_start_entry(line, n): | |
"""Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE). | |
n is counted starting with 1. | |
The n=1 field by definition begins at the first character. | |
Returns | |
------- | |
starting character : str | |
The starting character for entry ``n``. | |
""" | |
# This function is used by replace_entry | |
if n == 1: | |
return 0 # Special case | |
# Count the number of fields by counting spaces | |
c = 1 | |
leng = len(line) | |
# Initialize variables according to whether the first character | |
# is a space or a character | |
if line[0] == " ": | |
infield = False | |
field = 0 | |
else: | |
infield = True | |
field = 1 | |
while c < leng and field < n: | |
if infield: | |
if line[c] == " " and line[c - 1] != " ": | |
infield = False | |
else: | |
if line[c] != " ": | |
infield = True | |
field += 1 | |
c += 1 | |
return c - 1 | |
def data_table(fn_list, datalabel, keyatom): | |
"""Generate a data table from a list of input xpk files. | |
Parameters | |
---------- | |
fn_list : list | |
List of .xpk file names. | |
datalabel : str | |
The data element reported. | |
keyatom : str | |
The name of the nucleus used as an index for the data table. | |
Returns | |
------- | |
outlist : list | |
List of table rows indexed by ``keyatom``. | |
""" | |
# TODO - Clarify this docstring, add an example? | |
outlist = [] | |
dict_list, label_line_list = _read_dicts(fn_list, keyatom) | |
# Find global max and min residue numbers | |
minr = dict_list[0]["minres"] | |
maxr = dict_list[0]["maxres"] | |
for dictionary in dict_list: | |
if maxr < dictionary["maxres"]: | |
maxr = dictionary["maxres"] | |
if minr > dictionary["minres"]: | |
minr = dictionary["minres"] | |
res = minr | |
while res <= maxr: # s.t. res numbers | |
count = 0 | |
key = str(res) | |
line = key | |
for dictionary in dict_list: # s.t. dictionaries | |
label = label_line_list[count] | |
if key in dictionary: | |
line = ( | |
line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel] | |
) | |
else: | |
line += "\t*" | |
count += 1 | |
line += "\n" | |
outlist.append(line) | |
res += 1 | |
return outlist | |
def _read_dicts(fn_list, keyatom): | |
"""Read multiple files into a list of residue dictionaries (PRIVATE).""" | |
dict_list = [] | |
datalabel_list = [] | |
for fn in fn_list: | |
peaklist = Peaklist(fn) | |
dictionary = peaklist.residue_dict(keyatom) | |
dict_list.append(dictionary) | |
datalabel_list.append(peaklist.datalabels) | |
return [dict_list, datalabel_list] | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |