Spaces:
No application file
No application file
# Copyright 2000 by Jeffrey Chang. All rights reserved. | |
# This code is part of the Biopython distribution and governed by its | |
# license. Please see the LICENSE file that should have been included | |
# as part of this package. | |
"""Code to work with the prosite.doc file from Prosite. | |
See https://www.expasy.org/prosite/ | |
Tested with: | |
- Release 15.0, July 1998 | |
- Release 16.0, July 1999 | |
- Release 20.22, 13 November 2007 | |
- Release 20.43, 10 February 2009 | |
Functions: | |
- read Read a Prodoc file containing exactly one Prodoc entry. | |
- parse Iterates over entries in a Prodoc file. | |
Classes: | |
- Record Holds Prodoc data. | |
- Reference Holds data from a Prodoc reference. | |
""" | |
def read(handle): | |
"""Read in a record from a file with exactly one Prodoc record.""" | |
record = __read(handle) | |
# We should have reached the end of the record by now | |
line = handle.readline() | |
if line: | |
raise ValueError("More than one Prodoc record found") | |
return record | |
def parse(handle): | |
"""Iterate over the records in a Prodoc file.""" | |
while True: | |
record = __read(handle) | |
if not record: | |
return | |
yield record | |
class Record: | |
"""Holds information from a Prodoc record. | |
Attributes: | |
- accession Accession number of the record. | |
- prosite_refs List of tuples (prosite accession, prosite name). | |
- text Free format text. | |
- references List of reference objects. | |
""" | |
def __init__(self): | |
"""Initialize the class.""" | |
self.accession = "" | |
self.prosite_refs = [] | |
self.text = "" | |
self.references = [] | |
class Reference: | |
"""Holds information from a Prodoc citation. | |
Attributes: | |
- number Number of the reference. (string) | |
- authors Names of the authors. | |
- citation Describes the citation. | |
""" | |
def __init__(self): | |
"""Initialize the class.""" | |
self.number = "" | |
self.authors = "" | |
self.citation = "" | |
# Below are private functions | |
def __read_prosite_reference_line(record, line): | |
line = line.rstrip() | |
if line[-1] != "}": | |
raise ValueError(f"I don't understand the Prosite reference on line\n{line}") | |
acc, name = line[1:-1].split("; ") | |
record.prosite_refs.append((acc, name)) | |
def __read_text_line(record, line): | |
record.text += line | |
return True | |
def __read_reference_start(record, line): | |
# Read the references | |
reference = Reference() | |
reference.number = line[1:3].strip() | |
if line[1] == "E": | |
# If it's an electronic reference, then the URL is on the | |
# line, instead of the author. | |
reference.citation = line[4:].strip() | |
else: | |
reference.authors = line[4:].strip() | |
record.references.append(reference) | |
def __read_reference_line(record, line): | |
if not line.strip(): | |
return False | |
reference = record.references[-1] | |
if line.startswith(" "): | |
if reference.authors[-1] == ",": | |
reference.authors += line[4:].rstrip() | |
else: | |
reference.citation += line[5:] | |
return True | |
raise Exception(f"I don't understand the reference line\n{line}") | |
def __read_copyright_line(record, line): | |
# Skip the copyright statement | |
if line.startswith("+----"): | |
return False | |
return True | |
def __read(handle): | |
# Skip blank lines between records | |
for line in handle: | |
line = line.rstrip() | |
if line and not line.startswith("//"): | |
break | |
else: | |
return None | |
record = Record() | |
# Read the accession number | |
if not line.startswith("{PDOC"): | |
raise ValueError("Line does not start with '{PDOC':\n%s" % line) | |
if line[-1] != "}": | |
raise ValueError(f"I don't understand accession line\n{line}") | |
record.accession = line[1:-1] | |
# Read the Prosite references | |
for line in handle: | |
if line.startswith("{PS"): | |
__read_prosite_reference_line(record, line) | |
else: | |
break | |
else: | |
raise ValueError("Unexpected end of stream.") | |
# Read the actual text | |
if not line.startswith("{BEGIN"): | |
raise ValueError("Line does not start with '{BEGIN':\n%s" % line) | |
read_line = __read_text_line | |
for line in handle: | |
if line.startswith("{END}"): | |
# Clean up the record and return | |
for reference in record.references: | |
reference.citation = reference.citation.rstrip() | |
reference.authors = reference.authors.rstrip() | |
return record | |
elif line[0] == "[" and line[3] == "]" and line[4] == " ": | |
__read_reference_start(record, line) | |
read_line = __read_reference_line | |
elif line.startswith("+----"): | |
read_line = __read_copyright_line | |
elif read_line: | |
if not read_line(record, line): | |
read_line = None | |
raise ValueError("Unexpected end of stream.") | |