antonlabate
ver 1.3
d758c99
import abc
class AbstractPreproc(metaclass=abc.ABCMeta):
'''Used for preprocessing data according to the model's liking.
Some tasks normally performed here:
- Constructing a vocabulary from the training data
- Transforming the items in some way, such as
- Parsing the AST
-
- Loading and providing the pre-processed data to the model
TODO:
- Allow transforming items in a streaming fashion without loading all of them into memory first
'''
@abc.abstractmethod
def validate_item(self, item, section):
'''Checks whether item can be successfully preprocessed.
Returns a boolean and an arbitrary object.'''
pass
@abc.abstractmethod
def add_item(self, item, section, validation_info):
'''Add an item to be preprocessed.'''
pass
@abc.abstractmethod
def clear_items(self):
'''Clear the preprocessed items'''
pass
@abc.abstractmethod
def save(self):
'''Marks that all of the items have been preprocessed. Save state to disk.
Used in preprocess.py, after reading all of the data.'''
pass
@abc.abstractmethod
def load(self):
'''Load state from disk.'''
pass
@abc.abstractmethod
def dataset(self, section):
'''Returns a torch.data.utils.Dataset instance.'''
pass