Spaces:

aakash0017
/

DrVai-Rag-Testing

No application file

File size: 9,276 Bytes

b7731cd

# Copyright 2004 by Bob Bussell.  All rights reserved.
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.
"""Tools to manipulate data from nmrview .xpk peaklist files."""


HEADERLEN = 6


class XpkEntry:
    """Provide dictionary access to single entry from nmrview .xpk file.

    This class is suited for handling single lines of non-header data
    from an nmrview .xpk file. This class provides methods for extracting
    data by the field name which is listed in the last line of the
    peaklist header.

    Parameters
    ----------
    xpkentry : str
        The line from an nmrview .xpk file.
    xpkheadline : str
        The line from the header file that gives the names of the entries.
        This is typically the sixth line of the header, 1-origin.

    Attributes
    ----------
    fields : dict
        Dictionary of fields where key is in header line, value is an entry.
        Variables are accessed by either their name in the header line as in
        self.field["H1.P"] will return the H1.P entry for example.
        self.field["entrynum"] returns the line number (1st field of line)

    """

    def __init__(self, entry, headline):
        """Initialize the class."""
        # Holds all fields from input line in a dictionary
        # keys are data labels from the .xpk header
        datlist = entry.split()
        headlist = headline.split()

        self.fields = dict(zip(headlist, datlist[1:]))

        try:
            self.fields["entrynum"] = datlist[0]
        except IndexError:
            pass


class Peaklist:
    """Provide access to header lines and data from a nmrview xpk file.

    Header file lines and file data are available as attributes.

    Parameters
    ----------
    infn : str
        The input nmrview filename.

    Attributes
    ----------
    firstline  : str
        The first line in the header.
    axislabels : str
        The axis labels.
    dataset    : str
        The label of the dataset.
    sw         : str
        The sw coordinates.
    sf         : str
        The sf coordinates.
    datalabels : str
        The labels of the entries.

    data : list
        File data after header lines.

    Examples
    --------
    >>> from Bio.NMR.xpktools import Peaklist
    >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
    >>> peaklist.firstline
    'label dataset sw sf '
    >>> peaklist.dataset
    'test.nv'
    >>> peaklist.sf
    '{599.8230 } { 60.7860 } { 60.7860 }'
    >>> peaklist.datalabels
    ' H1.L  H1.P  H1.W  H1.B  H1.E  H1.J  15N2.L  15N2.P  15N2.W  15N2.B  15N2.E  15N2.J  N15.L  N15.P  N15.W  N15.B  N15.E  N15.J  vol  int  stat '

    """

    def __init__(self, infn):
        """Initialize the class."""
        with open(infn) as infile:

            # Read in the header lines
            self.firstline = infile.readline().split("\012")[0]
            self.axislabels = infile.readline().split("\012")[0]
            self.dataset = infile.readline().split("\012")[0]
            self.sw = infile.readline().split("\012")[0]
            self.sf = infile.readline().split("\012")[0]
            self.datalabels = infile.readline().split("\012")[0]

            # Read in the data lines to a list
            self.data = [line.split("\012")[0] for line in infile]

    def residue_dict(self, index):
        """Return a dict of lines in 'data' indexed by residue number or a nucleus.

        The nucleus should be given as the input argument in the same form as
        it appears in the xpk label line (H1, 15N for example)

        Parameters
        ----------
        index : str
            The nucleus to index data by.

        Returns
        -------
        resdict : dict
            Mappings of index nucleus to data line.

        Examples
        --------
        >>> from Bio.NMR.xpktools import Peaklist
        >>> peaklist = Peaklist('../Doc/examples/nmr/noed.xpk')
        >>> residue_d = peaklist.residue_dict('H1')
        >>> sorted(residue_d.keys())
        ['10', '3', '4', '5', '6', '7', '8', '9', 'maxres', 'minres']
        >>> residue_d['10']
        ['8  10.hn   7.663   0.021   0.010   ++   0.000   10.n   118.341   0.324   0.010   +E   0.000   10.n   118.476   0.324   0.010   +E   0.000  0.49840 0.49840 0']

        """
        maxres = -1
        minres = -1

        # Cast the data lines into the xpentry class
        self.dict = {}
        for line in self.data:
            ind = XpkEntry(line, self.datalabels).fields[index + ".L"]
            key = ind.split(".")[0]

            res = int(key)

            if maxres == -1:
                maxres = res
            if minres == -1:
                minres = res

            maxres = max([maxres, res])
            minres = min([minres, res])
            res = str(res)

            try:
                # Append additional data to list under same key
                self.dict[res].append(line)
            except KeyError:
                # This is a new residue, start a new list
                self.dict[res] = [line]  # Use [] for list type

        self.dict["maxres"] = maxres
        self.dict["minres"] = minres

        return self.dict

    def write_header(self, outfn):
        """Write header lines from input file to handle ``outfn``."""
        with open(outfn, "w") as outfile:
            outfile.write(self.firstline)
            outfile.write("\012")
            outfile.write(self.axislabels)
            outfile.write("\012")
            outfile.write(self.dataset)
            outfile.write("\012")
            outfile.write(self.sw)
            outfile.write("\012")
            outfile.write(self.sf)
            outfile.write("\012")
            outfile.write(self.datalabels)
            outfile.write("\012")


def replace_entry(line, fieldn, newentry):
    """Replace an entry in a string by the field number.

    No padding is implemented currently.  Spacing will change if
    the original field entry and the new field entry are of
    different lengths.
    """
    # This method depends on xpktools._find_start_entry

    start = _find_start_entry(line, fieldn)
    leng = len(line[start:].split()[0])
    newline = line[:start] + str(newentry) + line[(start + leng) :]
    return newline


def _find_start_entry(line, n):
    """Find the starting character for entry ``n`` in a space delimited ``line`` (PRIVATE).

    n is counted starting with 1.
    The n=1 field by definition begins at the first character.

    Returns
    -------
    starting character : str
        The starting character for entry ``n``.

    """
    # This function is used by replace_entry

    if n == 1:
        return 0  # Special case

    # Count the number of fields by counting spaces
    c = 1
    leng = len(line)

    # Initialize variables according to whether the first character
    #  is a space or a character
    if line[0] == " ":
        infield = False
        field = 0
    else:
        infield = True
        field = 1

    while c < leng and field < n:
        if infield:
            if line[c] == " " and line[c - 1] != " ":
                infield = False
            else:
                if line[c] != " ":
                    infield = True
                    field += 1

        c += 1

    return c - 1


def data_table(fn_list, datalabel, keyatom):
    """Generate a data table from a list of input xpk files.

    Parameters
    ----------
    fn_list : list
        List of .xpk file names.
    datalabel : str
        The data element reported.
    keyatom : str
        The name of the nucleus used as an index for the data table.

    Returns
    -------
    outlist : list
       List of table rows indexed by ``keyatom``.

    """
    # TODO - Clarify this docstring, add an example?
    outlist = []

    dict_list, label_line_list = _read_dicts(fn_list, keyatom)

    # Find global max and min residue numbers
    minr = dict_list[0]["minres"]
    maxr = dict_list[0]["maxres"]

    for dictionary in dict_list:
        if maxr < dictionary["maxres"]:
            maxr = dictionary["maxres"]
        if minr > dictionary["minres"]:
            minr = dictionary["minres"]

    res = minr
    while res <= maxr:  # s.t. res numbers
        count = 0
        key = str(res)
        line = key
        for dictionary in dict_list:  # s.t. dictionaries
            label = label_line_list[count]
            if key in dictionary:
                line = (
                    line + "\t" + XpkEntry(dictionary[key][0], label).fields[datalabel]
                )
            else:
                line += "\t*"
            count += 1
        line += "\n"
        outlist.append(line)
        res += 1

    return outlist


def _read_dicts(fn_list, keyatom):
    """Read multiple files into a list of residue dictionaries (PRIVATE)."""
    dict_list = []
    datalabel_list = []
    for fn in fn_list:
        peaklist = Peaklist(fn)
        dictionary = peaklist.residue_dict(keyatom)
        dict_list.append(dictionary)
        datalabel_list.append(peaklist.datalabels)

    return [dict_list, datalabel_list]


if __name__ == "__main__":
    from Bio._utils import run_doctest

    run_doctest()