Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

App Files Files Community

DrVai-Rag-Testing / myenv /lib /python3.10 /site-packages /Bio /NMR /xpktools.py

aakash0017

Upload folder using huggingface_hub

b7731cd over 1 year ago

raw

history blame contribute delete

9.28 kB

	# Copyright 2004 by Bob Bussell. All rights reserved.
	# This code is part of the Biopython distribution and governed by its
	# license. Please see the LICENSE file that should have been included
	# as part of this package.
	"""Tools to manipulate data from nmrview .xpk peaklist files."""


	HEADERLEN = 6


	class XpkEntry:
	"""Provide dictionary access to single entry from nmrview .xpk file.

	This class is suited for handling single lines of non-header data
	from an nmrview .xpk file. This class provides methods for extracting
	data by the field name which is listed in the last line of the
	peaklist header.

	Parameters
	----------
	xpkentry : str
	The line from an nmrview .xpk file.
	xpkheadline : str
	The line from the header file that gives the names of the entries.
	This is typically the sixth line of the header, 1-origin.

	Attributes
	----------
	fields : dict
	Dictionary of fields where key is in header line, value is an entry.
	Variables are accessed by either their name in the header line as in
	self.field["H1.P"] will return the H1.P entry for example.
	self.field["entrynum"] returns the line number (1st field of line)

	"""

	def __init__(self, entry, headline):
	"""Initialize the class."""
	# Holds all fields from input line in a dictionary
	# keys are data labels from the .xpk header
	datlist = entry.split()
	headlist = headline.split()

	self.fields = dict(zip(headlist, datlist[1:]))

	try:
	self.fields["entrynum"] = datlist[0]
	except IndexError:
	pass


	class Peaklist:
	"""Provide access to header lines and data from a nmrview xpk file.

	Header file lines and file data are available as attributes.

	Parameters
	----------
	infn : str
	The input nmrview filename.

	Attributes
	----------
	firstline : str
	The first line in the header.
	axislabels : str
	The axis labels.
	dataset : str
	The label of the dataset.
	sw : str
	The sw coordinates.
	sf : str
	The sf coordinates.
	datalabels : str
	The labels of the entries.

	data : list
	File data after header lines.

	Examples
	--------
	>>> from Bio.NMR.xpktools import Peaklist
	>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
	>>> peaklist.firstline
	'label dataset sw sf '
	>>> peaklist.dataset
	'test.nv'
	>>> peaklist.sf
	'{599.8230 } { 60.7860 } { 60.7860 }'
	>>> peaklist.datalabels
	' H1.L H1.P H1.W H1.B H1.E H1.J 15N2.L 15N2.P 15N2.W 15N2.B 15N2.E 15N2.J N15.L N15.P N15.W N15.B N15.E N15.J vol int stat '

	"""

	def __init__(self, infn):
	"""Initialize the class."""
	with open(infn) as infile:

	# Read in the header lines
	self.firstline = infile.readline().split("\012")[0]
	self.axislabels = infile.readline().split("\012")[0]
	self.dataset = infile.readline().split("\012")[0]
	self.sw = infile.readline().split("\012")[0]
	self.sf = infile.readline().split("\012")[0]
	self.datalabels = infile.readline().split("\012")[0]

	# Read in the data lines to a list
	self.data = [line.split("\012")[0] for line in infile]

	def residue_dict(self, index):
	"""Return a dict of lines in 'data' indexed by residue number or a nucleus.

	The nucleus should be given as the input argument in the same form as
	it appears in the xpk label line (H1, 15N for example)

	Parameters
	----------
	index : str
	The nucleus to index data by.

	Returns
	-------
	resdict : dict
	Mappings of index nucleus to data line.

	Examples
	--------
	>>> from Bio.NMR.xpktools import Peaklist
	>>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
	>>> residue_d = peaklist.residue_dict('H1')
	>>> sorted(residue_d.keys())
	['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres']
	>>> residue_d['10']
	['8 10.hn 7.663 0.021 0.010 ++ 0.000 10.n 118.341 0.324 0.010 +E 0.000 10.n 118.476 0.324 0.010 +E 0.000 0.49840 0.49840 0']

	"""
	maxres = -1
	minres = -1

	# Cast the data lines into the xpentry class
	self.dict = {}
	for line in self.data:
	ind = XpkEntry(line, self.datalabels).fields[index + ".L"]
	key = ind.split(".")[0]

	res = int(key)

	if maxres == -1:
	maxres = res
	if minres == -1:
	minres = res

	maxres = max([maxres, res])
	minres = min([minres, res])
	res = str(res)

	try:
	# Append additional data to list under same key
	self.dict[res].append(line)
	except KeyError:
	# This is a new residue, start a new list
	self.dict[res] = [line] # Use [] for list type

	self.dict["maxres"] = maxres
	self.dict["minres"] = minres

	return self.dict

	def write_header(self, outfn):
	"""Write header lines from input file to handle ``outfn``."""
	with open(outfn, "w") as outfile:
	outfile.write(self.firstline)
	outfile.write("\012")
	outfile.write(self.axislabels)
	outfile.write("\012")
	outfile.write(self.dataset)
	outfile.write("\012")
	outfile.write(self.sw)
	outfile.write("\012")
	outfile.write(self.sf)
	outfile.write("\012")
	outfile.write(self.datalabels)
	outfile.write("\012")


	def replace_entry(line, fieldn, newentry):
	"""Replace an entry in a string by the field number.

	No padding is implemented currently. Spacing will change if
	the original field entry and the new field entry are of
	different lengths.
	"""
	# This method depends on xpktools._find_start_entry

	start = _find_start_entry(line, fieldn)
	leng = len(line[start:].split()[0])
	newline = line[:start] + str(newentry) + line[(start + leng) :]
	return newline


	def _find_start_entry(line, n):
	"""Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE).

	n is counted starting with 1.
	The n=1 field by definition begins at the first character.

	Returns
	-------
	starting character : str
	The starting character for entry ``n``.

	"""
	# This function is used by replace_entry

	if n == 1:
	return 0 # Special case

	# Count the number of fields by counting spaces
	c = 1
	leng = len(line)

	# Initialize variables according to whether the first character
	# is a space or a character
	if line[0] == " ":
	infield = False
	field = 0
	else:
	infield = True
	field = 1

	while c < leng and field < n:
	if infield:
	if line[c] == " " and line[c - 1] != " ":
	infield = False
	else:
	if line[c] != " ":
	infield = True
	field += 1

	c += 1

	return c - 1


	def data_table(fn_list, datalabel, keyatom):
	"""Generate a data table from a list of input xpk files.

	Parameters
	----------
	fn_list : list
	List of .xpk file names.
	datalabel : str
	The data element reported.
	keyatom : str
	The name of the nucleus used as an index for the data table.

	Returns
	-------
	outlist : list
	List of table rows indexed by ``keyatom``.

	"""
	# TODO - Clarify this docstring, add an example?
	outlist = []

	dict_list, label_line_list = _read_dicts(fn_list, keyatom)

	# Find global max and min residue numbers
	minr = dict_list[0]["minres"]
	maxr = dict_list[0]["maxres"]

	for dictionary in dict_list:
	if maxr < dictionary["maxres"]:
	maxr = dictionary["maxres"]
	if minr > dictionary["minres"]:
	minr = dictionary["minres"]

	res = minr
	while res <= maxr: # s.t. res numbers
	count = 0
	key = str(res)
	line = key
	for dictionary in dict_list: # s.t. dictionaries
	label = label_line_list[count]
	if key in dictionary:
	line = (
	line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel]
	)
	else:
	line += "\t*"
	count += 1
	line += "\n"
	outlist.append(line)
	res += 1

	return outlist


	def _read_dicts(fn_list, keyatom):
	"""Read multiple files into a list of residue dictionaries (PRIVATE)."""
	dict_list = []
	datalabel_list = []
	for fn in fn_list:
	peaklist = Peaklist(fn)
	dictionary = peaklist.residue_dict(keyatom)
	dict_list.append(dictionary)
	datalabel_list.append(peaklist.datalabels)

	return [dict_list, datalabel_list]


	if __name__ == "__main__":
	from Bio._utils import run_doctest

	run_doctest()