aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# Copyright 2004 by Bob Bussell. All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
"""Tools to manipulate data from nmrview .xpk peaklist files."""
HEADERLEN = 6
class XpkEntry:
"""Provide dictionary access to single entry from nmrview .xpk file.
This class is suited for handling single lines of non-header data
from an nmrview .xpk file. This class provides methods for extracting
data by the field name which is listed in the last line of the
peaklist header.
Parameters
----------
xpkentry : str
The line from an nmrview .xpk file.
xpkheadline : str
The line from the header file that gives the names of the entries.
This is typically the sixth line of the header, 1-origin.
Attributes
----------
fields : dict
Dictionary of fields where key is in header line, value is an entry.
Variables are accessed by either their name in the header line as in
self.field["H1.P"] will return the H1.P entry for example.
self.field["entrynum"] returns the line number (1st field of line)
"""
def __init__(self, entry, headline):
"""Initialize the class."""
# Holds all fields from input line in a dictionary
# keys are data labels from the .xpk header
datlist = entry.split()
headlist = headline.split()
self.fields = dict(zip(headlist, datlist[1:]))
try:
self.fields["entrynum"] = datlist[0]
except IndexError:
pass
class Peaklist:
"""Provide access to header lines and data from a nmrview xpk file.
Header file lines and file data are available as attributes.
Parameters
----------
infn : str
The input nmrview filename.
Attributes
----------
firstline : str
The first line in the header.
axislabels : str
The axis labels.
dataset : str
The label of the dataset.
sw : str
The sw coordinates.
sf : str
The sf coordinates.
datalabels : str
The labels of the entries.
data : list
File data after header lines.
Examples
--------
>>> from Bio.NMR.xpktools import Peaklist
>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
>>> peaklist.firstline
'label dataset sw sf '
>>> peaklist.dataset
'test.nv'
>>> peaklist.sf
'{599.8230 } { 60.7860 } { 60.7860 }'
>>> peaklist.datalabels
' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat '
"""
def __init__(self, infn):
"""Initialize the class."""
with open(infn) as infile:
# Read in the header lines
self.firstline = infile.readline().split("\012")[0]
self.axislabels = infile.readline().split("\012")[0]
self.dataset = infile.readline().split("\012")[0]
self.sw = infile.readline().split("\012")[0]
self.sf = infile.readline().split("\012")[0]
self.datalabels = infile.readline().split("\012")[0]
# Read in the data lines to a list
self.data = [line.split("\012")[0] for line in infile]
def residue_dict(self, index):
"""Return a dict of lines in 'data' indexed by residue number or a nucleus.
The nucleus should be given as the input argument in the same form as
it appears in the xpk label line (H1, 15N for example)
Parameters
----------
index : str
The nucleus to index data by.
Returns
-------
resdict : dict
Mappings of index nucleus to data line.
Examples
--------
>>> from Bio.NMR.xpktools import Peaklist
>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
>>> residue_d = peaklist.residue_dict('H1')
>>> sorted(residue_d.keys())
['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres']
>>> residue_d['10']
['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0']
"""
maxres = -1
minres = -1
# Cast the data lines into the xpentry class
self.dict = {}
for line in self.data:
ind = XpkEntry(line, self.datalabels).fields[index + ".L"]
key = ind.split(".")[0]
res = int(key)
if maxres == -1:
maxres = res
if minres == -1:
minres = res
maxres = max([maxres, res])
minres = min([minres, res])
res = str(res)
try:
# Append additional data to list under same key
self.dict[res].append(line)
except KeyError:
# This is a new residue, start a new list
self.dict[res] = [line] # Use [] for list type
self.dict["maxres"] = maxres
self.dict["minres"] = minres
return self.dict
def write_header(self, outfn):
"""Write header lines from input file to handle ``outfn``."""
with open(outfn, "w") as outfile:
outfile.write(self.firstline)
outfile.write("\012")
outfile.write(self.axislabels)
outfile.write("\012")
outfile.write(self.dataset)
outfile.write("\012")
outfile.write(self.sw)
outfile.write("\012")
outfile.write(self.sf)
outfile.write("\012")
outfile.write(self.datalabels)
outfile.write("\012")
def replace_entry(line, fieldn, newentry):
"""Replace an entry in a string by the field number.
No padding is implemented currently. Spacing will change if
the original field entry and the new field entry are of
different lengths.
"""
# This method depends on xpktools._find_start_entry
start = _find_start_entry(line, fieldn)
leng = len(line[start:].split()[0])
newline = line[:start] + str(newentry) + line[(start + leng) :]
return newline
def _find_start_entry(line, n):
"""Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE).
n is counted starting with 1.
The n=1 field by definition begins at the first character.
Returns
-------
starting character : str
The starting character for entry ``n``.
"""
# This function is used by replace_entry
if n == 1:
return 0 # Special case
# Count the number of fields by counting spaces
c = 1
leng = len(line)
# Initialize variables according to whether the first character
# is a space or a character
if line[0] == " ":
infield = False
field = 0
else:
infield = True
field = 1
while c < leng and field < n:
if infield:
if line[c] == " " and line[c - 1] != " ":
infield = False
else:
if line[c] != " ":
infield = True
field += 1
c += 1
return c - 1
def data_table(fn_list, datalabel, keyatom):
"""Generate a data table from a list of input xpk files.
Parameters
----------
fn_list : list
List of .xpk file names.
datalabel : str
The data element reported.
keyatom : str
The name of the nucleus used as an index for the data table.
Returns
-------
outlist : list
List of table rows indexed by ``keyatom``.
"""
# TODO - Clarify this docstring, add an example?
outlist = []
dict_list, label_line_list = _read_dicts(fn_list, keyatom)
# Find global max and min residue numbers
minr = dict_list[0]["minres"]
maxr = dict_list[0]["maxres"]
for dictionary in dict_list:
if maxr < dictionary["maxres"]:
maxr = dictionary["maxres"]
if minr > dictionary["minres"]:
minr = dictionary["minres"]
res = minr
while res <= maxr: # s.t. res numbers
count = 0
key = str(res)
line = key
for dictionary in dict_list: # s.t. dictionaries
label = label_line_list[count]
if key in dictionary:
line = (
line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel]
)
else:
line += "\t*"
count += 1
line += "\n"
outlist.append(line)
res += 1
return outlist
def _read_dicts(fn_list, keyatom):
"""Read multiple files into a list of residue dictionaries (PRIVATE)."""
dict_list = []
datalabel_list = []
for fn in fn_list:
peaklist = Peaklist(fn)
dictionary = peaklist.residue_dict(keyatom)
dict_list.append(dictionary)
datalabel_list.append(peaklist.datalabels)
return [dict_list, datalabel_list]
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()