Spaces:
Sleeping
Sleeping
File size: 5,839 Bytes
d916065 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
from typing import Tuple
import pytest
from nltk.metrics.distance import edit_distance
class TestEditDistance:
@pytest.mark.parametrize(
"left,right,substitution_cost,expecteds",
[
# Allowing transpositions reduces the number of edits required.
# with transpositions:
# e.g. "abc" -T-> "cba" -D-> "ca": 2 steps
#
# without transpositions:
# e.g. "abc" -D-> "ab" -D-> "a" -I-> "ca": 3 steps
("abc", "ca", 1, (2, 3)),
("abc", "ca", 5, (2, 3)), # Doesn't *require* substitutions
# Note, a substition_cost of higher than 2 doesn't make much
# sense, as a deletion + insertion is identical, and always
# costs 2.
#
#
# Transpositions don't always reduce the number of edits required:
# with or without transpositions:
# e.g. "wants" -D-> "wats" -D-> "was" -I-> "wasp": 3 steps
("wants", "wasp", 1, (3, 3)),
("wants", "wasp", 5, (3, 3)), # Doesn't *require* substitutions
#
#
# Ought to have the same results with and without transpositions
# with or without transpositions:
# e.g. "rain" -S-> "sain" -S-> "shin" -I-> "shine": 3 steps
# (but cost 5 if substitution_cost=2)
("rain", "shine", 1, (3, 3)),
("rain", "shine", 2, (5, 5)), # Does *require* substitutions
#
#
# Several potentially interesting typos
# with transpositions:
# e.g. "acbdef" -T-> "abcdef": 1 step
#
# without transpositions:
# e.g. "acbdef" -D-> "abdef" -I-> "abcdef": 2 steps
("acbdef", "abcdef", 1, (1, 2)),
("acbdef", "abcdef", 2, (1, 2)), # Doesn't *require* substitutions
#
#
# with transpositions:
# e.g. "lnaguaeg" -T-> "languaeg" -T-> "language": 2 steps
#
# without transpositions:
# e.g. "lnaguaeg" -D-> "laguaeg" -I-> "languaeg" -D-> "languag" -I-> "language": 4 steps
("lnaguaeg", "language", 1, (2, 4)),
("lnaguaeg", "language", 2, (2, 4)), # Doesn't *require* substitutions
#
#
# with transpositions:
# e.g. "lnaugage" -T-> "lanugage" -T-> "language": 2 steps
#
# without transpositions:
# e.g. "lnaugage" -S-> "lnangage" -D-> "langage" -I-> "language": 3 steps
# (but one substitution, so a cost of 4 if substition_cost = 2)
("lnaugage", "language", 1, (2, 3)),
("lnaugage", "language", 2, (2, 4)),
# Does *require* substitutions if no transpositions
#
#
# with transpositions:
# e.g. "lngauage" -T-> "lnaguage" -T-> "language": 2 steps
# without transpositions:
# e.g. "lngauage" -I-> "lanaguage" -D-> "language": 2 steps
("lngauage", "language", 1, (2, 2)),
("lngauage", "language", 2, (2, 2)), # Doesn't *require* substitutions
#
#
# with or without transpositions:
# e.g. "wants" -S-> "sants" -S-> "swnts" -S-> "swits" -S-> "swims" -D-> "swim": 5 steps
#
# with substitution_cost=2 and transpositions:
# e.g. "wants" -T-> "santw" -D-> "sntw" -D-> "stw" -D-> "sw"
# -I-> "swi" -I-> "swim": 6 steps
#
# with substitution_cost=2 and no transpositions:
# e.g. "wants" -I-> "swants" -D-> "swant" -D-> "swan" -D-> "swa" -D-> "sw"
# -I-> "swi" -I-> "swim": 7 steps
("wants", "swim", 1, (5, 5)),
("wants", "swim", 2, (6, 7)),
#
#
# with or without transpositions:
# e.g. "kitten" -S-> "sitten" -s-> "sittin" -I-> "sitting": 3 steps
# (but cost 5 if substitution_cost=2)
("kitten", "sitting", 1, (3, 3)),
("kitten", "sitting", 2, (5, 5)),
#
# duplicated letter
# e.g. "duplicated" -D-> "duplicated"
("duplicated", "duuplicated", 1, (1, 1)),
("duplicated", "duuplicated", 2, (1, 1)),
("very duplicated", "very duuplicateed", 2, (2, 2)),
],
)
def test_with_transpositions(
self, left: str, right: str, substitution_cost: int, expecteds: Tuple[int, int]
):
"""
Test `edit_distance` between two strings, given some `substitution_cost`,
and whether transpositions are allowed.
:param str left: First input string to `edit_distance`.
:param str right: Second input string to `edit_distance`.
:param int substitution_cost: The cost of a substitution action in `edit_distance`.
:param Tuple[int, int] expecteds: A tuple of expected outputs, such that `expecteds[0]` is
the expected output with `transpositions=True`, and `expecteds[1]` is
the expected output with `transpositions=False`.
"""
# Test the input strings in both orderings
for s1, s2 in ((left, right), (right, left)):
# zip with [True, False] to get the transpositions value
for expected, transpositions in zip(expecteds, [True, False]):
predicted = edit_distance(
s1,
s2,
substitution_cost=substitution_cost,
transpositions=transpositions,
)
assert predicted == expected
|