sashtech's picture
Upload 9 files
7b96a1b verified
import unicodedata
from collections import OrderedDict
from functools import total_ordering
def get_match_ordered_dict():
slots = OrderedDict([
('ruleId', str),
('message', str),
('replacements', list),
('offsetInContext', int),
('context', str),
('offset', int),
('errorLength', int),
('category', str),
('ruleIssueType', str),
('sentence', str),
])
return slots
""" Sample match JSON:
{
'message': 'Possible spelling mistake found.',
'shortMessage': 'Spelling mistake',
'replacements': [{'value': 'newt'}, {'value': 'not'}, {'value': 'new', 'shortDescription': 'having just been made'}, {'value': 'news'}, {'value': 'foot', 'shortDescription': 'singular'}, {'value': 'root', 'shortDescription': 'underground organ of a plant'}, {'value': 'boot'}, {'value': 'noon'}, {'value': 'loot', 'shortDescription': 'plunder'}, {'value': 'moot'}, {'value': 'Root'}, {'value': 'soot', 'shortDescription': 'carbon black'}, {'value': 'newts'}, {'value': 'nook'}, {'value': 'Lieut'}, {'value': 'coot'}, {'value': 'hoot'}, {'value': 'toot'}, {'value': 'snoot'}, {'value': 'neut'}, {'value': 'nowt'}, {'value': 'Noor'}, {'value': 'noob'}],
'offset': 8,
'length': 4,
'context': {'text': 'This is noot okay. ', 'offset': 8, 'length': 4}, 'sentence': 'This is noot okay.',
'type': {'typeName': 'Other'},
'rule': {'id': 'MORFOLOGIK_RULE_EN_US', 'description': 'Possible spelling mistake', 'issueType': 'misspelling', 'category': {'id': 'TYPOS', 'name': 'Possible Typo'}},
'ignoreForIncompleteSentence': False,
'contextForSureMatch': 0
}
"""
def auto_type(obj):
try:
return int(obj)
except ValueError:
try:
return float(obj)
except ValueError:
return obj
@total_ordering
class Match:
"""Hold information about where a rule matches text."""
def __init__(self, attrib):
# Process rule.
attrib['category'] = attrib['rule']['category']['id']
attrib['ruleId'] = attrib['rule']['id']
attrib['ruleIssueType'] = attrib['rule']['issueType']
del attrib['rule']
# Process context.
attrib['offsetInContext'] = attrib['context']['offset']
attrib['context'] = attrib['context']['text']
# Process replacements.
attrib['replacements'] = [r['value'] for r in attrib['replacements']]
# Rename error length.
attrib['errorLength'] = attrib['length']
# Normalize unicode
attrib['message'] = unicodedata.normalize("NFKC", attrib['message'])
# Store objects on self.
for k, v in attrib.items():
setattr(self, k, v)
def __repr__(self):
def _ordered_dict_repr():
slots = list(get_match_ordered_dict())
slots += list(set(self.__dict__).difference(slots))
attrs = [slot for slot in slots
if slot in self.__dict__ and not slot.startswith('_')]
return '{{{}}}'.format(
', '.join([
'{!r}: {!r}'.format(attr, getattr(self, attr))
for attr in attrs
])
)
return '{}({})'.format(self.__class__.__name__, _ordered_dict_repr())
def __str__(self):
ruleId = self.ruleId
s = 'Offset {}, length {}, Rule ID: {}'.format(
self.offset, self.errorLength, ruleId)
if self.message:
s += '\nMessage: {}'.format(self.message)
if self.replacements:
s += '\nSuggestion: {}'.format('; '.join(self.replacements))
s += '\n{}\n{}'.format(
self.context, ' ' * self.offsetInContext + '^' * self.errorLength
)
return s
@property
def matchedText(self):
""" Returns the text that garnered the error (without its surrounding context).
"""
return self.context[self.offsetInContext:self.offsetInContext+self.errorLength]
def __eq__(self, other):
return list(self) == list(other)
def __lt__(self, other):
return list(self) < list(other)
def __iter__(self):
return iter(getattr(self, attr) for attr in get_match_ordered_dict())
def __setattr__(self, key, value):
try:
value = get_match_ordered_dict()[key](value)
except KeyError:
return
super().__setattr__(key, value)
def __getattr__(self, name):
if name not in get_match_ordered_dict():
raise AttributeError('{!r} object has no attribute {!r}'
.format(self.__class__.__name__, name))