File size: 4,026 Bytes
d2a8669
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
from abc import abstractmethod
from functools import wraps

from aif360.datasets import Dataset
from aif360.decorating_metaclass import ApplyDecorator


# TODO: Use sklearn.exceptions.NotFittedError instead?
class NotFittedError(ValueError, AttributeError):
    """Error to be raised if `predict` or `transform` is called before `fit`."""

def addmetadata(func):
    """Decorator for instance methods which perform a transformation and return
    a new dataset.

    Automatically populates the `metadata` field of the new dataset to reflect
    details of the transformation that occurred, e.g.::

        {
            'transformer': 'TransformerClass.function_name',
            'params': kwargs_from_init,
            'previous': [all_datasets_used_by_func]
        }
    """
    @wraps(func)
    def wrapper(self, *args, **kwargs):
        new_dataset = func(self, *args, **kwargs)
        if isinstance(new_dataset, Dataset):
            new_dataset.metadata = new_dataset.metadata.copy()
            new_dataset.metadata.update({
                'transformer': '{}.{}'.format(type(self).__name__, func.__name__),
                'params': self._params,
                'previous': [a for a in args if isinstance(a, Dataset)]
            })
        return new_dataset
    return wrapper


BaseClass = ApplyDecorator(addmetadata)

class Transformer(BaseClass):
    """Abstract base class for transformers.

    Transformers are an abstraction for any process which acts on a
    :obj:`Dataset` and returns a new, modified Dataset. This definition
    encompasses pre-processing, in-processing, and post-processing algorithms.
    """

    @abstractmethod
    def __init__(self, **kwargs):
        """Initialize a Transformer object.

        Algorithm-specific configuration parameters should be passed here.
        """
        self._params = kwargs

    def fit(self, dataset):
        """Train a model on the input.

        Args:
            dataset (Dataset): Input dataset.

        Returns:
            Transformer: Returns self.
        """
        return self

    def predict(self, dataset):
        """Return a new dataset with labels predicted by running this
        Transformer on the input.

        Args:
            dataset (Dataset): Input dataset.

        Returns:
            Dataset: Output dataset. `metadata` should reflect the details of
            this transformation.
        """
        raise NotImplementedError("'predict' is not supported for this class. "
            "Perhaps you meant 'transform' or 'fit_predict' instead?")

    def transform(self, dataset):
        """Return a new dataset generated by running this Transformer on the
        input.

        This function could return different `dataset.features`,
        `dataset.labels`, or both.

        Args:
            dataset (Dataset): Input dataset.

        Returns:
            Dataset: Output dataset. `metadata` should reflect the details of
            this transformation.
        """
        raise NotImplementedError("'transform' is not supported for this class."
            " Perhaps you meant 'predict' or 'fit_transform' instead?")

    def fit_predict(self, dataset):
        """Train a model on the input and predict the labels.

        Equivalent to calling `fit(dataset)` followed by `predict(dataset)`.

        Args:
            dataset (Dataset): Input dataset.

        Returns:
            Dataset: Output dataset. `metadata` should reflect the details of
            this transformation.
        """
        return self.fit(dataset).predict(dataset)

    def fit_transform(self, dataset):
        """Train a model on the input and transform the dataset accordingly.

        Equivalent to calling `fit(dataset)` followed by `transform(dataset)`.

        Args:
            dataset (Dataset): Input dataset.

        Returns:
            Dataset: Output dataset. `metadata` should reflect the details of
            this transformation.
        """
        return self.fit(dataset).transform(dataset)