File size: 4,138 Bytes
44d88a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# Copied from https://github.com/fahadh4ilyas/syllable_splitter
# MIT License
import re


class SyllableSplitter:
    def __init__(self):
        self.consonant = set(
            [
                "b",
                "c",
                "d",
                "f",
                "g",
                "h",
                "j",
                "k",
                "l",
                "m",
                "n",
                "p",
                "q",
                "r",
                "s",
                "t",
                "v",
                "w",
                "x",
                "y",
                "z",
                "ng",
                "ny",
                "sy",
                "ch",
                "dh",
                "gh",
                "kh",
                "ph",
                "sh",
                "th",
            ]
        )
        self.double_consonant = set(["ll", "ks", "rs", "rt", "nk", "nd"])
        self.vocal = set(["a", "e", "ê", "é", "è", "i", "o", "u"])

    def split_letters(self, string):
        letters = []
        arrange = []

        while string != "":
            letter = string[:2]

            if letter in self.double_consonant:
                if string[2:] != "" and string[2] in self.vocal:
                    letters += [letter[0]]
                    arrange += ["c"]
                    string = string[1:]
                else:
                    letters += [letter]
                    arrange += ["c"]
                    string = string[2:]
            elif letter in self.consonant:
                letters += [letter]
                arrange += ["c"]
                string = string[2:]
            elif letter in self.vocal:
                letters += [letter]
                arrange += ["v"]
                string = string[2:]
            else:
                letter = string[0]

                if letter in self.consonant:
                    letters += [letter]
                    arrange += ["c"]
                    string = string[1:]
                elif letter in self.vocal:
                    letters += [letter]
                    arrange += ["v"]
                    string = string[1:]
                else:
                    letters += [letter]
                    arrange += ["s"]
                    string = string[1:]

        return letters, "".join(arrange)

    def split_syllables_from_letters(self, letters, arrange):
        consonant_index = re.search(r"vc{2,}", arrange)
        while consonant_index:
            i = consonant_index.start() + 1
            letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
            arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
            consonant_index = re.search(r"vc{2,}", arrange)

        vocal_index = re.search(r"v{2,}", arrange)
        while vocal_index:
            i = vocal_index.start()
            letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
            arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
            vocal_index = re.search(r"v{2,}", arrange)

        vcv_index = re.search(r"vcv", arrange)
        while vcv_index:
            i = vcv_index.start()
            letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
            arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
            vcv_index = re.search(r"vcv", arrange)

        sep_index = re.search(r"[cvs]s", arrange)
        while sep_index:
            i = sep_index.start()
            letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
            arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
            sep_index = re.search(r"[cvs]s", arrange)

        sep_index = re.search(r"s[cvs]", arrange)
        while sep_index:
            i = sep_index.start()
            letters = letters[: i + 1] + ["|"] + letters[i + 1 :]
            arrange = arrange[: i + 1] + "|" + arrange[i + 1 :]
            sep_index = re.search(r"s[cvs]", arrange)
        return "".join(letters).split("|")

    def split_syllables(self, string):
        letters, arrange = self.split_letters(string)
        return self.split_syllables_from_letters(letters, arrange)