Spaces:
Sleeping
Sleeping
import unicodedata | |
from collections import OrderedDict | |
from functools import total_ordering | |
def get_match_ordered_dict(): | |
slots = OrderedDict([ | |
('ruleId', str), | |
('message', str), | |
('replacements', list), | |
('offsetInContext', int), | |
('context', str), | |
('offset', int), | |
('errorLength', int), | |
('category', str), | |
('ruleIssueType', str), | |
('sentence', str), | |
]) | |
return slots | |
""" Sample match JSON: | |
{ | |
'message': 'Possible spelling mistake found.', | |
'shortMessage': 'Spelling mistake', | |
'replacements': [{'value': 'newt'}, {'value': 'not'}, {'value': 'new', 'shortDescription': 'having just been made'}, {'value': 'news'}, {'value': 'foot', 'shortDescription': 'singular'}, {'value': 'root', 'shortDescription': 'underground organ of a plant'}, {'value': 'boot'}, {'value': 'noon'}, {'value': 'loot', 'shortDescription': 'plunder'}, {'value': 'moot'}, {'value': 'Root'}, {'value': 'soot', 'shortDescription': 'carbon black'}, {'value': 'newts'}, {'value': 'nook'}, {'value': 'Lieut'}, {'value': 'coot'}, {'value': 'hoot'}, {'value': 'toot'}, {'value': 'snoot'}, {'value': 'neut'}, {'value': 'nowt'}, {'value': 'Noor'}, {'value': 'noob'}], | |
'offset': 8, | |
'length': 4, | |
'context': {'text': 'This is noot okay. ', 'offset': 8, 'length': 4}, 'sentence': 'This is noot okay.', | |
'type': {'typeName': 'Other'}, | |
'rule': {'id': 'MORFOLOGIK_RULE_EN_US', 'description': 'Possible spelling mistake', 'issueType': 'misspelling', 'category': {'id': 'TYPOS', 'name': 'Possible Typo'}}, | |
'ignoreForIncompleteSentence': False, | |
'contextForSureMatch': 0 | |
} | |
""" | |
def auto_type(obj): | |
try: | |
return int(obj) | |
except ValueError: | |
try: | |
return float(obj) | |
except ValueError: | |
return obj | |
class Match: | |
"""Hold information about where a rule matches text.""" | |
def __init__(self, attrib): | |
# Process rule. | |
attrib['category'] = attrib['rule']['category']['id'] | |
attrib['ruleId'] = attrib['rule']['id'] | |
attrib['ruleIssueType'] = attrib['rule']['issueType'] | |
del attrib['rule'] | |
# Process context. | |
attrib['offsetInContext'] = attrib['context']['offset'] | |
attrib['context'] = attrib['context']['text'] | |
# Process replacements. | |
attrib['replacements'] = [r['value'] for r in attrib['replacements']] | |
# Rename error length. | |
attrib['errorLength'] = attrib['length'] | |
# Normalize unicode | |
attrib['message'] = unicodedata.normalize("NFKC", attrib['message']) | |
# Store objects on self. | |
for k, v in attrib.items(): | |
setattr(self, k, v) | |
def __repr__(self): | |
def _ordered_dict_repr(): | |
slots = list(get_match_ordered_dict()) | |
slots += list(set(self.__dict__).difference(slots)) | |
attrs = [slot for slot in slots | |
if slot in self.__dict__ and not slot.startswith('_')] | |
return '{{{}}}'.format( | |
', '.join([ | |
'{!r}: {!r}'.format(attr, getattr(self, attr)) | |
for attr in attrs | |
]) | |
) | |
return '{}({})'.format(self.__class__.__name__, _ordered_dict_repr()) | |
def __str__(self): | |
ruleId = self.ruleId | |
s = 'Offset {}, length {}, Rule ID: {}'.format( | |
self.offset, self.errorLength, ruleId) | |
if self.message: | |
s += '\nMessage: {}'.format(self.message) | |
if self.replacements: | |
s += '\nSuggestion: {}'.format('; '.join(self.replacements)) | |
s += '\n{}\n{}'.format( | |
self.context, ' ' * self.offsetInContext + '^' * self.errorLength | |
) | |
return s | |
def matchedText(self): | |
""" Returns the text that garnered the error (without its surrounding context). | |
""" | |
return self.context[self.offsetInContext:self.offsetInContext+self.errorLength] | |
def __eq__(self, other): | |
return list(self) == list(other) | |
def __lt__(self, other): | |
return list(self) < list(other) | |
def __iter__(self): | |
return iter(getattr(self, attr) for attr in get_match_ordered_dict()) | |
def __setattr__(self, key, value): | |
try: | |
value = get_match_ordered_dict()[key](value) | |
except KeyError: | |
return | |
super().__setattr__(key, value) | |
def __getattr__(self, name): | |
if name not in get_match_ordered_dict(): | |
raise AttributeError('{!r} object has no attribute {!r}' | |
.format(self.__class__.__name__, name)) | |