File size: 4,721 Bytes
7b96a1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import unicodedata
from collections import OrderedDict
from functools import total_ordering

def get_match_ordered_dict():
    slots = OrderedDict([
        ('ruleId', str), 
        ('message', str),
        ('replacements', list),
        ('offsetInContext', int), 
        ('context', str), 
        ('offset', int), 
        ('errorLength', int),
        ('category', str), 
        ('ruleIssueType', str),
        ('sentence', str), 
    ])
    return slots

""" Sample match JSON:
    {
        'message': 'Possible spelling mistake found.', 
        'shortMessage': 'Spelling mistake', 
        'replacements': [{'value': 'newt'}, {'value': 'not'}, {'value': 'new', 'shortDescription': 'having just been made'}, {'value': 'news'}, {'value': 'foot', 'shortDescription': 'singular'}, {'value': 'root', 'shortDescription': 'underground organ of a plant'}, {'value': 'boot'}, {'value': 'noon'}, {'value': 'loot', 'shortDescription': 'plunder'}, {'value': 'moot'}, {'value': 'Root'}, {'value': 'soot', 'shortDescription': 'carbon black'}, {'value': 'newts'}, {'value': 'nook'}, {'value': 'Lieut'}, {'value': 'coot'}, {'value': 'hoot'}, {'value': 'toot'}, {'value': 'snoot'}, {'value': 'neut'}, {'value': 'nowt'}, {'value': 'Noor'}, {'value': 'noob'}], 
        'offset': 8, 
        'length': 4, 
        'context': {'text': 'This is noot okay. ', 'offset': 8, 'length': 4}, 'sentence': 'This is noot okay.', 
        'type': {'typeName': 'Other'}, 
        'rule': {'id': 'MORFOLOGIK_RULE_EN_US', 'description': 'Possible spelling mistake', 'issueType': 'misspelling', 'category': {'id': 'TYPOS', 'name': 'Possible Typo'}}, 
        'ignoreForIncompleteSentence': False, 
        'contextForSureMatch': 0
    }

"""
def auto_type(obj):
    try:
        return int(obj)
    except ValueError:
        try:
            return float(obj)
        except ValueError:
            return obj

@total_ordering
class Match:
    """Hold information about where a rule matches text."""
    def __init__(self, attrib):
        # Process rule.
        attrib['category'] = attrib['rule']['category']['id']
        attrib['ruleId'] = attrib['rule']['id']
        attrib['ruleIssueType'] = attrib['rule']['issueType']
        del attrib['rule']
        # Process context.
        attrib['offsetInContext'] = attrib['context']['offset']
        attrib['context'] = attrib['context']['text']
        # Process replacements.
        attrib['replacements'] = [r['value'] for r in attrib['replacements']]
        # Rename error length.
        attrib['errorLength'] = attrib['length']
        # Normalize unicode
        attrib['message'] = unicodedata.normalize("NFKC", attrib['message'])
        # Store objects on self.
        for k, v in attrib.items():
            setattr(self, k, v)

    def __repr__(self):
        def _ordered_dict_repr():
            slots = list(get_match_ordered_dict())
            slots += list(set(self.__dict__).difference(slots))
            attrs = [slot for slot in slots
                     if slot in self.__dict__ and not slot.startswith('_')]
            return '{{{}}}'.format(
                ', '.join([
                    '{!r}: {!r}'.format(attr, getattr(self, attr))
                    for attr in attrs
                ])
            )

        return '{}({})'.format(self.__class__.__name__, _ordered_dict_repr())

    def __str__(self):
        ruleId = self.ruleId
        s = 'Offset {}, length {}, Rule ID: {}'.format(
            self.offset, self.errorLength, ruleId)
        if self.message:
            s += '\nMessage: {}'.format(self.message)
        if self.replacements:
            s += '\nSuggestion: {}'.format('; '.join(self.replacements))
        s += '\n{}\n{}'.format(
            self.context, ' ' * self.offsetInContext + '^' * self.errorLength
        )
        return s

    @property
    def matchedText(self):
        """ Returns the text that garnered the error (without its surrounding context).
        """
        return self.context[self.offsetInContext:self.offsetInContext+self.errorLength]

    def __eq__(self, other):
        return list(self) == list(other)

    def __lt__(self, other):
        return list(self) < list(other)

    def __iter__(self):
        return iter(getattr(self, attr) for attr in get_match_ordered_dict())

    def __setattr__(self, key, value):
        try:
            value = get_match_ordered_dict()[key](value)
        except KeyError:
            return
        super().__setattr__(key, value)

    def __getattr__(self, name):
        if name not in get_match_ordered_dict():
            raise AttributeError('{!r} object has no attribute {!r}'
                                 .format(self.__class__.__name__, name))