Spaces:
No application file
No application file
# This code is part of the Biopython distribution and governed by its | |
# license. Please see the LICENSE file that should have been included | |
# as part of this package. | |
# | |
"""Useful utilities for helping in parsing GenBank files.""" | |
class FeatureValueCleaner: | |
r"""Provide specialized capabilities for cleaning up values in features. | |
This class is designed to provide a mechanism to clean up and process | |
values in the key/value pairs of GenBank features. This is useful | |
because in cases like:: | |
/translation="MED | |
YDPWNLRFQSKYKSRDA" | |
you'll otherwise end up with white space in it. | |
This cleaning needs to be done on a case by case basis since it is | |
impossible to interpret whether you should be concatenating everything | |
(as in translations), or combining things with spaces (as might be | |
the case with /notes). | |
>>> cleaner = FeatureValueCleaner(["translation"]) | |
>>> cleaner | |
FeatureValueCleaner(['translation']) | |
>>> cleaner.clean_value("translation", "MED\nYDPWNLRFQSKYKSRDA") | |
'MEDYDPWNLRFQSKYKSRDA' | |
""" | |
keys_to_process = ["translation"] | |
def __init__(self, to_process=keys_to_process): | |
"""Initialize with the keys we should deal with.""" | |
self._to_process = to_process | |
def __repr__(self): | |
"""Return a string representation of the class.""" | |
return f"{self.__class__.__name__}({self._to_process!r})" | |
def clean_value(self, key_name, value): | |
"""Clean the specified value and return it. | |
If the value is not specified to be dealt with, the original value | |
will be returned. | |
""" | |
if key_name in self._to_process: | |
try: | |
cleaner = getattr(self, f"_clean_{key_name}") | |
except AttributeError: | |
raise AssertionError(f"No function to clean key: {key_name}") from None | |
value = cleaner(value) | |
return value | |
def _clean_translation(self, value): | |
"""Concatenate a translation value to one long protein string (PRIVATE).""" | |
translation_parts = value.split() | |
return "".join(translation_parts) | |
if __name__ == "__main__": | |
from Bio._utils import run_doctest | |
run_doctest() | |