c4ai-usp
/

gap-text2sql

antonlabate

ver 1.3

d758c99 over 2 years ago

1.38 kB

	import abc

	class AbstractPreproc(metaclass=abc.ABCMeta):
	'''Used for preprocessing data according to the model's liking.

	Some tasks normally performed here:
	- Constructing a vocabulary from the training data
	- Transforming the items in some way, such as
	- Parsing the AST
	-
	- Loading and providing the pre-processed data to the model

	TODO:
	- Allow transforming items in a streaming fashion without loading all of them into memory first
	'''

	@abc.abstractmethod
	def validate_item(self, item, section):
	'''Checks whether item can be successfully preprocessed.

	Returns a boolean and an arbitrary object.'''
	pass

	@abc.abstractmethod
	def add_item(self, item, section, validation_info):
	'''Add an item to be preprocessed.'''
	pass

	@abc.abstractmethod
	def clear_items(self):
	'''Clear the preprocessed items'''
	pass

	@abc.abstractmethod
	def save(self):
	'''Marks that all of the items have been preprocessed. Save state to disk.

	Used in preprocess.py, after reading all of the data.'''
	pass

	@abc.abstractmethod
	def load(self):
	'''Load state from disk.'''
	pass

	@abc.abstractmethod
	def dataset(self, section):
	'''Returns a torch.data.utils.Dataset instance.'''
	pass