Spaces:
No application file
No application file
File size: 2,229 Bytes
b7731cd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
"""Useful utilities for helping in parsing GenBank files."""
class FeatureValueCleaner:
r"""Provide specialized capabilities for cleaning up values in features.
This class is designed to provide a mechanism to clean up and process
values in the key/value pairs of GenBank features. This is useful
because in cases like::
/translation="MED
YDPWNLRFQSKYKSRDA"
you'll otherwise end up with white space in it.
This cleaning needs to be done on a case by case basis since it is
impossible to interpret whether you should be concatenating everything
(as in translations), or combining things with spaces (as might be
the case with /notes).
>>> cleaner = FeatureValueCleaner(["translation"])
>>> cleaner
FeatureValueCleaner(['translation'])
>>> cleaner.clean_value("translation", "MED\nYDPWNLRFQSKYKSRDA")
'MEDYDPWNLRFQSKYKSRDA'
"""
keys_to_process = ["translation"]
def __init__(self, to_process=keys_to_process):
"""Initialize with the keys we should deal with."""
self._to_process = to_process
def __repr__(self):
"""Return a string representation of the class."""
return f"{self.__class__.__name__}({self._to_process!r})"
def clean_value(self, key_name, value):
"""Clean the specified value and return it.
If the value is not specified to be dealt with, the original value
will be returned.
"""
if key_name in self._to_process:
try:
cleaner = getattr(self, f"_clean_{key_name}")
except AttributeError:
raise AssertionError(f"No function to clean key: {key_name}") from None
value = cleaner(value)
return value
def _clean_translation(self, value):
"""Concatenate a translation value to one long protein string (PRIVATE)."""
translation_parts = value.split()
return "".join(translation_parts)
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()
|