AkitoP's picture
upload
a2afacf
import sys
# sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert-kv_cache-new_text-emo")
sys.path.append("/data/docker/liujing04/gpt-vits/mq-vits-s1bert_no_bert-kv_cache-new_text-emo2v2")
import re
import cn2an
from pyjyutping import jyutping
from text.symbols import punctuation
normalizer = lambda x: cn2an.transform(x, "an2cn")
INITIALS = [
"aa",
"aai",
"aak",
"aap",
"aat",
"aau",
"ai",
"au",
"ap",
"at",
"ak",
"a",
"p",
"b",
"e",
"ts",
"t",
"dz",
"d",
"kw",
"k",
"gw",
"g",
"f",
"h",
"l",
"m",
"ng",
"n",
"s",
"y",
"w",
"c",
"z",
"j",
"ong",
"on",
"ou",
"oi",
"ok",
"o",
"uk",
"ung",
]
INITIALS += ["sp", "spl", "spn", "sil"]
rep_map = {
"๏ผš": ",",
"๏ผ›": ",",
"๏ผŒ": ",",
"ใ€‚": ".",
"๏ผ": "!",
"๏ผŸ": "?",
"\n": ".",
"ยท": ",",
"ใ€": ",",
"...": "โ€ฆ",
"$": ".",
"โ€œ": "'",
"โ€": "'",
'"': "'",
"โ€˜": "'",
"โ€™": "'",
"๏ผˆ": "'",
"๏ผ‰": "'",
"(": "'",
")": "'",
"ใ€Š": "'",
"ใ€‹": "'",
"ใ€": "'",
"ใ€‘": "'",
"[": "'",
"]": "'",
"โ€”": "-",
"๏ฝž": "-",
"~": "-",
"ใ€Œ": "'",
"ใ€": "'",
}
def replace_punctuation(text):
# text = text.replace("ๅ—ฏ", "ๆฉ").replace("ๅ‘ฃ", "ๆฏ")
pattern = re.compile("|".join(re.escape(p) for p in rep_map.keys()))
replaced_text = pattern.sub(lambda x: rep_map[x.group()], text)
replaced_text = re.sub(
r"[^\u4e00-\u9fa5" + "".join(punctuation) + r"]+", "", replaced_text
)
return replaced_text
def text_normalize(text):
text = normalizer(text)
text = replace_punctuation(text)
return text
punctuation_set=set(punctuation)
def jyuping_to_initials_finals_tones(jyuping_syllables):
initials_finals = []
tones = []
word2ph = []
for syllable in jyuping_syllables:
if syllable in punctuation:
initials_finals.append(syllable)
tones.append(0)
word2ph.append(1) # Add 1 for punctuation
elif syllable == "_":
initials_finals.append(syllable)
tones.append(0)
word2ph.append(1) # Add 1 for underscore
else:
try:
tone = int(syllable[-1])
syllable_without_tone = syllable[:-1]
except ValueError:
tone = 0
syllable_without_tone = syllable
for initial in INITIALS:
if syllable_without_tone.startswith(initial):
if syllable_without_tone.startswith("nga"):
initials_finals.extend(
[
syllable_without_tone[:2],
syllable_without_tone[2:] or syllable_without_tone[-1],
]
)
# tones.extend([tone, tone])
tones.extend([-1, tone])
word2ph.append(2)
else:
final = syllable_without_tone[len(initial) :] or initial[-1]
initials_finals.extend([initial, final])
# tones.extend([tone, tone])
tones.extend([-1, tone])
word2ph.append(2)
break
assert len(initials_finals) == len(tones)
###้ญ”ๆ”นไธบ่พ…้Ÿณ+ๅธฆ้Ÿณ่ฐƒ็š„ๅ…ƒ้Ÿณ
phones=[]
for a,b in zip(initials_finals,tones):
if(b not in [-1,0]):###้˜ฒๆญข็ฒค่ฏญๅ’Œๆ™ฎ้€š่ฏ้‡ๅˆๅผ€ๅคดๅŠ Y๏ผŒๅฆ‚ๆžœๆ˜ฏๆ ‡็‚น๏ผŒไธๅŠ ใ€‚
todo="%s%s"%(a,b)
else:todo=a
if(todo not in punctuation_set):todo="Y%s"%todo
phones.append(todo)
# return initials_finals, tones, word2ph
return phones, word2ph
def get_jyutping(text):
jp = jyutping.convert(text)
# print(1111111,jp)
for symbol in punctuation:
jp = jp.replace(symbol, " " + symbol + " ")
jp_array = jp.split()
return jp_array
def get_bert_feature(text, word2ph):
from text import chinese_bert
return chinese_bert.get_bert_feature(text, word2ph)
def g2p(text):
# word2ph = []
jyuping = get_jyutping(text)
# print(jyuping)
# phones, tones, word2ph = jyuping_to_initials_finals_tones(jyuping)
phones, word2ph = jyuping_to_initials_finals_tones(jyuping)
# phones = ["_"] + phones + ["_"]
# tones = [0] + tones + [0]
# word2ph = [1] + word2ph + [1]
return phones, word2ph
if __name__ == "__main__":
# text = "ๅ•Š๏ผไฝ†ๆ˜ฏใ€ŠๅŽŸ็ฅžใ€‹ๆ˜ฏ็”ฑ,็ฑณๅ“ˆ\ๆธธ่‡ชไธป๏ผŒ [็ ”ๅ‘]็š„ไธ€ๆฌพๅ…จ.ๆ–ฐๅผ€ๆ”พไธ–็•Œ.ๅ†’้™ฉๆธธๆˆ"
text = "ไฝขๅ€‹้‹ค้ ญๅคช็Ÿญๅ•ฆใ€‚"
text = text_normalize(text)
# phones, tones, word2ph = g2p(text)
phones, word2ph = g2p(text)
# print(phones, tones, word2ph)
print(phones, word2ph)