Spaces:
Running
Running
File size: 2,425 Bytes
85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 85b7206 0700cb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import numpy as np
# ref from https://gitlab.com/-/snippets/1948157
# For some variants, look here https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#Python
# Pure/numpy python
def edit_distance_python2(a: str, b: str) -> np.ndarray | int:
"""A pure python levenshtein distance implementation"""
# This version is commutative, so as an optimization we force |a|>=|b|
if len(a) < len(b):
return edit_distance_python(b, a)
if len(b) == 0: # Can deal with empty sequences faster
return len(a)
# Only two rows are really needed: the one currently filled in, and the previous
distances = []
distances.append([i for i in range(len(b) + 1)])
distances.append([0 for _ in range(len(b) + 1)])
# We can prefill the first row:
costs = [0 for _ in range(3)]
for i, a_token in enumerate(a, start=1):
distances[1][0] += 1 # Deals with the first column.
for j, b_token in enumerate(b, start=1):
costs[0] = distances[1][j - 1] + 1
costs[1] = distances[0][j] + 1
costs[2] = distances[0][j - 1] + (0 if a_token == b_token else 1)
distances[1][j] = min(costs)
# Move to the next row:
distances[0][:] = distances[1][:]
return distances[1][len(b)]
#https://stackabuse.com/levenshtein-distance-and-text-similarity-in-python/
def edit_distance_python(seq1: str, seq2: str) -> np.ndarray:
"""A levenshtein distance implementation.
Args:
seq1 (str): First sequence.
seq2 (str): Second sequence.
Returns:
np.ndarray: The levenshtein distance between the two sequences.
"""
size_x = len(seq1) + 1
size_y = len(seq2) + 1
matrix = np.zeros((size_x, size_y))
for x in range(size_x):
matrix[x, 0] = x
for y in range(size_y):
matrix[0, y] = y
for x in range(1, size_x):
for y in range(1, size_y):
if seq1[x - 1] == seq2[y - 1]:
matrix[x, y] = min(
matrix[x - 1, y] + 1,
matrix[x - 1, y - 1],
matrix[x, y - 1] + 1
)
else:
matrix[x, y] = min(
matrix[x - 1, y] + 1,
matrix[x - 1, y - 1] + 1,
matrix[x, y - 1] + 1
)
#print (matrix)
return matrix[size_x - 1, size_y - 1]
|