import abc class AbstractPreproc(metaclass=abc.ABCMeta): '''Used for preprocessing data according to the model's liking. Some tasks normally performed here: - Constructing a vocabulary from the training data - Transforming the items in some way, such as - Parsing the AST - - Loading and providing the pre-processed data to the model TODO: - Allow transforming items in a streaming fashion without loading all of them into memory first ''' @abc.abstractmethod def validate_item(self, item, section): '''Checks whether item can be successfully preprocessed. Returns a boolean and an arbitrary object.''' pass @abc.abstractmethod def add_item(self, item, section, validation_info): '''Add an item to be preprocessed.''' pass @abc.abstractmethod def clear_items(self): '''Clear the preprocessed items''' pass @abc.abstractmethod def save(self): '''Marks that all of the items have been preprocessed. Save state to disk. Used in preprocess.py, after reading all of the data.''' pass @abc.abstractmethod def load(self): '''Load state from disk.''' pass @abc.abstractmethod def dataset(self, section): '''Returns a torch.data.utils.Dataset instance.''' pass