Spaces:
Running
Running
File size: 4,138 Bytes
44d88a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
# Copied from https://github.com/fahadh4ilyas/syllable_splitter
# MIT License
import re
class SyllableSplitter:
def __init__(self):
self.consonant = set(
[
"b",
"c",
"d",
"f",
"g",
"h",
"j",
"k",
"l",
"m",
"n",
"p",
"q",
"r",
"s",
"t",
"v",
"w",
"x",
"y",
"z",
"ng",
"ny",
"sy",
"ch",
"dh",
"gh",
"kh",
"ph",
"sh",
"th",
]
)
self.double_consonant = set(["ll", "ks", "rs", "rt", "nk", "nd"])
self.vocal = set(["a", "e", "ê", "é", "è", "i", "o", "u"])
def split_letters(self, string):
letters = []
arrange = []
while string != "":
letter = string[:2]
if letter in self.double_consonant:
if string[2:] != "" and string[2] in self.vocal:
letters += [letter[0]]
arrange += ["c"]
string = string[1:]
else:
letters += [letter]
arrange += ["c"]
string = string[2:]
elif letter in self.consonant:
letters += [letter]
arrange += ["c"]
string = string[2:]
elif letter in self.vocal:
letters += [letter]
arrange += ["v"]
string = string[2:]
else:
letter = string[0]
if letter in self.consonant:
letters += [letter]
arrange += ["c"]
string = string[1:]
elif letter in self.vocal:
letters += [letter]
arrange += ["v"]
string = string[1:]
else:
letters += [letter]
arrange += ["s"]
string = string[1:]
return letters, "".join(arrange)
def split_syllables_from_letters(self, letters, arrange):
consonant_index = re.search(r"vc{2,}", arrange)
while consonant_index:
i = consonant_index.start() + 1
letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
consonant_index = re.search(r"vc{2,}", arrange)
vocal_index = re.search(r"v{2,}", arrange)
while vocal_index:
i = vocal_index.start()
letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
vocal_index = re.search(r"v{2,}", arrange)
vcv_index = re.search(r"vcv", arrange)
while vcv_index:
i = vcv_index.start()
letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
vcv_index = re.search(r"vcv", arrange)
sep_index = re.search(r"[cvs]s", arrange)
while sep_index:
i = sep_index.start()
letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
sep_index = re.search(r"[cvs]s", arrange)
sep_index = re.search(r"s[cvs]", arrange)
while sep_index:
i = sep_index.start()
letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
sep_index = re.search(r"s[cvs]", arrange)
return "".join(letters).split("|")
def split_syllables(self, string):
letters, arrange = self.split_letters(string)
return self.split_syllables_from_letters(letters, arrange)
|