File size: 2,933 Bytes
8044721
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
class Pipeline(object):
    """Defines a pipeline for transforming sequence data.

    The input is assumed to be utf-8 encoded `str` (Python 3) or
    `unicode` (Python 2).

    Attributes:
        convert_token: The function to apply to input sequence data.
        pipes: The Pipelines that will be applied to input sequence
            data in order.
    """

    def __init__(self, convert_token=None):
        """Create a pipeline.

        Arguments:
            convert_token: The function to apply to input sequence data.
                If None, the identity function is used. Default: None
        """
        if convert_token is None:
            self.convert_token = Pipeline.identity
        elif callable(convert_token):
            self.convert_token = convert_token
        else:
            raise ValueError("Pipeline input convert_token {} is not None "
                             "or callable".format(convert_token))
        self.pipes = [self]

    def __call__(self, x, *args):
        """Apply the the current Pipeline(s) to an input.

        Arguments:
            x: The input to process with the Pipeline(s).
            Positional arguments: Forwarded to the `call` function
                of the Pipeline(s).
        """
        for pipe in self.pipes:
            x = pipe.call(x, *args)
        return x

    def call(self, x, *args):
        """Apply _only_ the convert_token function of the current pipeline
        to the input. If the input is a list, a list with the results of
        applying the `convert_token` function to all input elements is
        returned.

        Arguments:
            x: The input to apply the convert_token function to.
            Positional arguments: Forwarded to the `convert_token` function
                of the current Pipeline.
        """
        if isinstance(x, list):
            return [self.convert_token(tok, *args) for tok in x]
        return self.convert_token(x, *args)

    def add_before(self, pipeline):
        """Add a Pipeline to be applied before this processing pipeline.

        Arguments:
            pipeline: The Pipeline or callable to apply before this
                Pipeline.
        """
        if not isinstance(pipeline, Pipeline):
            pipeline = Pipeline(pipeline)
        self.pipes = pipeline.pipes[:] + self.pipes[:]
        return self

    def add_after(self, pipeline):
        """Add a Pipeline to be applied after this processing pipeline.

        Arguments:
            pipeline: The Pipeline or callable to apply after this
                Pipeline.
        """
        if not isinstance(pipeline, Pipeline):
            pipeline = Pipeline(pipeline)
        self.pipes = self.pipes[:] + pipeline.pipes[:]
        return self

    @staticmethod
    def identity(x):
        """Return a copy of the input.

        This is here for serialization compatibility with pickle.
        """
        return x