aakash0017's picture
Upload folder using huggingface_hub
b7731cd
# This code is part of the Biopython distribution and governed by its
# license. Please see the LICENSE file that should have been included
# as part of this package.
#
"""Useful utilities for helping in parsing GenBank files."""
class FeatureValueCleaner:
r"""Provide specialized capabilities for cleaning up values in features.
This class is designed to provide a mechanism to clean up and process
values in the key/value pairs of GenBank features. This is useful
because in cases like::
/translation="MED
YDPWNLRFQSKYKSRDA"
you'll otherwise end up with white space in it.
This cleaning needs to be done on a case by case basis since it is
impossible to interpret whether you should be concatenating everything
(as in translations), or combining things with spaces (as might be
the case with /notes).
>>> cleaner = FeatureValueCleaner(["translation"])
>>> cleaner
FeatureValueCleaner(['translation'])
>>> cleaner.clean_value("translation", "MED\nYDPWNLRFQSKYKSRDA")
'MEDYDPWNLRFQSKYKSRDA'
"""
keys_to_process = ["translation"]
def __init__(self, to_process=keys_to_process):
"""Initialize with the keys we should deal with."""
self._to_process = to_process
def __repr__(self):
"""Return a string representation of the class."""
return f"{self.__class__.__name__}({self._to_process!r})"
def clean_value(self, key_name, value):
"""Clean the specified value and return it.
If the value is not specified to be dealt with, the original value
will be returned.
"""
if key_name in self._to_process:
try:
cleaner = getattr(self, f"_clean_{key_name}")
except AttributeError:
raise AssertionError(f"No function to clean key: {key_name}") from None
value = cleaner(value)
return value
def _clean_translation(self, value):
"""Concatenate a translation value to one long protein string (PRIVATE)."""
translation_parts = value.split()
return "".join(translation_parts)
if __name__ == "__main__":
from Bio._utils import run_doctest
run_doctest()