Spaces:
Running
on
Zero
Running
on
Zero
# Copyright (c) 2024 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import re | |
import jieba | |
import cn2an | |
from pypinyin import lazy_pinyin, BOPOMOFO | |
from typing import List | |
from diffrhythm.g2p.g2p.chinese_model_g2p import BertPolyPredict | |
from diffrhythm.g2p.utils.front_utils import * | |
import os | |
from huggingface_hub import hf_hub_download | |
# from g2pw import G2PWConverter | |
# set blank level, {0:"none",1:"char", 2:"word"} | |
BLANK_LEVEL = 0 | |
# conv = G2PWConverter(style='pinyin', enable_non_tradional_chinese=True) | |
resource_path = r"./diffrhythm/g2p" | |
poly_all_class_path = os.path.join( | |
resource_path, "sources", "g2p_chinese_model", "polychar.txt" | |
) | |
if not os.path.exists(poly_all_class_path): | |
print( | |
"Incorrect path for polyphonic character class dictionary: {}, please check...".format( | |
poly_all_class_path | |
) | |
) | |
exit() | |
poly_dict = generate_poly_lexicon(poly_all_class_path) | |
# Set up G2PW model parameters | |
g2pw_poly_model_path = os.path.join(resource_path, "sources", "g2p_chinese_model") | |
if not os.path.exists(g2pw_poly_model_path): | |
print( | |
"Incorrect path for g2pw polyphonic character model: {}, please check...".format( | |
g2pw_poly_model_path | |
) | |
) | |
exit() | |
json_file_path = os.path.join( | |
resource_path, "sources", "g2p_chinese_model", "polydict.json" | |
) | |
if not os.path.exists(json_file_path): | |
print( | |
"Incorrect path for g2pw id to pinyin dictionary: {}, please check...".format( | |
json_file_path | |
) | |
) | |
exit() | |
jsonr_file_path = os.path.join( | |
resource_path, "sources", "g2p_chinese_model", "polydict_r.json" | |
) | |
if not os.path.exists(jsonr_file_path): | |
print( | |
"Incorrect path for g2pw pinyin to id dictionary: {}, please check...".format( | |
jsonr_file_path | |
) | |
) | |
exit() | |
g2pw_poly_predict = BertPolyPredict( | |
g2pw_poly_model_path, jsonr_file_path, json_file_path | |
) | |
""" | |
Text clean time | |
""" | |
# List of (Latin alphabet, bopomofo) pairs: | |
_latin_to_bopomofo = [ | |
(re.compile("%s" % x[0], re.IGNORECASE), x[1]) | |
for x in [ | |
("a", "ㄟˉ"), | |
("b", "ㄅㄧˋ"), | |
("c", "ㄙㄧˉ"), | |
("d", "ㄉㄧˋ"), | |
("e", "ㄧˋ"), | |
("f", "ㄝˊㄈㄨˋ"), | |
("g", "ㄐㄧˋ"), | |
("h", "ㄝˇㄑㄩˋ"), | |
("i", "ㄞˋ"), | |
("j", "ㄐㄟˋ"), | |
("k", "ㄎㄟˋ"), | |
("l", "ㄝˊㄛˋ"), | |
("m", "ㄝˊㄇㄨˋ"), | |
("n", "ㄣˉ"), | |
("o", "ㄡˉ"), | |
("p", "ㄆㄧˉ"), | |
("q", "ㄎㄧㄡˉ"), | |
("r", "ㄚˋ"), | |
("s", "ㄝˊㄙˋ"), | |
("t", "ㄊㄧˋ"), | |
("u", "ㄧㄡˉ"), | |
("v", "ㄨㄧˉ"), | |
("w", "ㄉㄚˋㄅㄨˋㄌㄧㄡˋ"), | |
("x", "ㄝˉㄎㄨˋㄙˋ"), | |
("y", "ㄨㄞˋ"), | |
("z", "ㄗㄟˋ"), | |
] | |
] | |
# List of (bopomofo, ipa) pairs: | |
_bopomofo_to_ipa = [ | |
(re.compile("%s" % x[0]), x[1]) | |
for x in [ | |
("ㄅㄛ", "p⁼wo"), | |
("ㄆㄛ", "pʰwo"), | |
("ㄇㄛ", "mwo"), | |
("ㄈㄛ", "fwo"), | |
("ㄧㄢ", "|jɛn"), | |
("ㄩㄢ", "|ɥæn"), | |
("ㄧㄣ", "|in"), | |
("ㄩㄣ", "|ɥn"), | |
("ㄧㄥ", "|iŋ"), | |
("ㄨㄥ", "|ʊŋ"), | |
("ㄩㄥ", "|jʊŋ"), | |
# Add | |
("ㄧㄚ", "|ia"), | |
("ㄧㄝ", "|iɛ"), | |
("ㄧㄠ", "|iɑʊ"), | |
("ㄧㄡ", "|ioʊ"), | |
("ㄧㄤ", "|iɑŋ"), | |
("ㄨㄚ", "|ua"), | |
("ㄨㄛ", "|uo"), | |
("ㄨㄞ", "|uaɪ"), | |
("ㄨㄟ", "|ueɪ"), | |
("ㄨㄢ", "|uan"), | |
("ㄨㄣ", "|uən"), | |
("ㄨㄤ", "|uɑŋ"), | |
("ㄩㄝ", "|ɥɛ"), | |
# End | |
("ㄅ", "p⁼"), | |
("ㄆ", "pʰ"), | |
("ㄇ", "m"), | |
("ㄈ", "f"), | |
("ㄉ", "t⁼"), | |
("ㄊ", "tʰ"), | |
("ㄋ", "n"), | |
("ㄌ", "l"), | |
("ㄍ", "k⁼"), | |
("ㄎ", "kʰ"), | |
("ㄏ", "x"), | |
("ㄐ", "tʃ⁼"), | |
("ㄑ", "tʃʰ"), | |
("ㄒ", "ʃ"), | |
("ㄓ", "ts`⁼"), | |
("ㄔ", "ts`ʰ"), | |
("ㄕ", "s`"), | |
("ㄖ", "ɹ`"), | |
("ㄗ", "ts⁼"), | |
("ㄘ", "tsʰ"), | |
("ㄙ", "|s"), | |
("ㄚ", "|a"), | |
("ㄛ", "|o"), | |
("ㄜ", "|ə"), | |
("ㄝ", "|ɛ"), | |
("ㄞ", "|aɪ"), | |
("ㄟ", "|eɪ"), | |
("ㄠ", "|ɑʊ"), | |
("ㄡ", "|oʊ"), | |
("ㄢ", "|an"), | |
("ㄣ", "|ən"), | |
("ㄤ", "|ɑŋ"), | |
("ㄥ", "|əŋ"), | |
("ㄦ", "əɹ"), | |
("ㄧ", "|i"), | |
("ㄨ", "|u"), | |
("ㄩ", "|ɥ"), | |
("ˉ", "→|"), | |
("ˊ", "↑|"), | |
("ˇ", "↓↑|"), | |
("ˋ", "↓|"), | |
("˙", "|"), | |
] | |
] | |
must_not_er_words = {"女儿", "老儿", "男儿", "少儿", "小儿"} | |
chinese_lexicon_path = hf_hub_download( | |
repo_id="ASLP-lab/DiffRhythm", | |
filename="diffrhythm/g2p/sources/chinese_lexicon.txt", | |
repo_type="space" | |
) | |
word_pinyin_dict = {} | |
with open(chinese_lexicon_path, "r", encoding="utf-8") as fread: | |
txt_list = fread.readlines() | |
for txt in txt_list: | |
word, pinyin = txt.strip().split("\t") | |
word_pinyin_dict[word] = pinyin | |
fread.close() | |
pinyin_2_bopomofo_dict = {} | |
with open( | |
r"./diffrhythm/g2p/sources/pinyin_2_bpmf.txt", "r", encoding="utf-8" | |
) as fread: | |
txt_list = fread.readlines() | |
for txt in txt_list: | |
pinyin, bopomofo = txt.strip().split("\t") | |
pinyin_2_bopomofo_dict[pinyin] = bopomofo | |
fread.close() | |
tone_dict = { | |
"0": "˙", | |
"5": "˙", | |
"1": "", | |
"2": "ˊ", | |
"3": "ˇ", | |
"4": "ˋ", | |
} | |
bopomofos2pinyin_dict = {} | |
with open( | |
r"./diffrhythm/g2p/sources/bpmf_2_pinyin.txt", "r", encoding="utf-8" | |
) as fread: | |
txt_list = fread.readlines() | |
for txt in txt_list: | |
v, k = txt.strip().split("\t") | |
bopomofos2pinyin_dict[k] = v | |
fread.close() | |
def bpmf_to_pinyin(text): | |
bopomofo_list = text.split("|") | |
pinyin_list = [] | |
for info in bopomofo_list: | |
pinyin = "" | |
for c in info: | |
if c in bopomofos2pinyin_dict: | |
pinyin += bopomofos2pinyin_dict[c] | |
if len(pinyin) == 0: | |
continue | |
if pinyin[-1] not in "01234": | |
pinyin += "1" | |
if pinyin[:-1] == "ve": | |
pinyin = "y" + pinyin | |
if pinyin[:-1] == "sh": | |
pinyin = pinyin[:-1] + "i" + pinyin[-1] | |
if pinyin == "sh": | |
pinyin = pinyin[:-1] + "i" | |
if pinyin[:-1] == "s": | |
pinyin = "si" + pinyin[-1] | |
if pinyin[:-1] == "c": | |
pinyin = "ci" + pinyin[-1] | |
if pinyin[:-1] == "i": | |
pinyin = "yi" + pinyin[-1] | |
if pinyin[:-1] == "iou": | |
pinyin = "you" + pinyin[-1] | |
if pinyin[:-1] == "ien": | |
pinyin = "yin" + pinyin[-1] | |
if "iou" in pinyin and pinyin[-4:-1] == "iou": | |
pinyin = pinyin[:-4] + "iu" + pinyin[-1] | |
if "uei" in pinyin: | |
if pinyin[:-1] == "uei": | |
pinyin = "wei" + pinyin[-1] | |
elif pinyin[-4:-1] == "uei": | |
pinyin = pinyin[:-4] + "ui" + pinyin[-1] | |
if "uen" in pinyin and pinyin[-4:-1] == "uen": | |
if pinyin[:-1] == "uen": | |
pinyin = "wen" + pinyin[-1] | |
elif pinyin[-4:-1] == "uei": | |
pinyin = pinyin[:-4] + "un" + pinyin[-1] | |
if "van" in pinyin and pinyin[-4:-1] == "van": | |
if pinyin[:-1] == "van": | |
pinyin = "yuan" + pinyin[-1] | |
elif pinyin[-4:-1] == "van": | |
pinyin = pinyin[:-4] + "uan" + pinyin[-1] | |
if "ueng" in pinyin and pinyin[-5:-1] == "ueng": | |
pinyin = pinyin[:-5] + "ong" + pinyin[-1] | |
if pinyin[:-1] == "veng": | |
pinyin = "yong" + pinyin[-1] | |
if "veng" in pinyin and pinyin[-5:-1] == "veng": | |
pinyin = pinyin[:-5] + "iong" + pinyin[-1] | |
if pinyin[:-1] == "ieng": | |
pinyin = "ying" + pinyin[-1] | |
if pinyin[:-1] == "u": | |
pinyin = "wu" + pinyin[-1] | |
if pinyin[:-1] == "v": | |
pinyin = "yv" + pinyin[-1] | |
if pinyin[:-1] == "ing": | |
pinyin = "ying" + pinyin[-1] | |
if pinyin[:-1] == "z": | |
pinyin = "zi" + pinyin[-1] | |
if pinyin[:-1] == "zh": | |
pinyin = "zhi" + pinyin[-1] | |
if pinyin[0] == "u": | |
pinyin = "w" + pinyin[1:] | |
if pinyin[0] == "i": | |
pinyin = "y" + pinyin[1:] | |
pinyin = pinyin.replace("ien", "in") | |
pinyin_list.append(pinyin) | |
return " ".join(pinyin_list) | |
# Convert numbers to Chinese pronunciation | |
def number_to_chinese(text): | |
# numbers = re.findall(r'\d+(?:\.?\d+)?', text) | |
# for number in numbers: | |
# text = text.replace(number, cn2an.an2cn(number), 1) | |
text = cn2an.transform(text, "an2cn") | |
return text | |
def normalization(text): | |
text = text.replace(",", ",") | |
text = text.replace("。", ".") | |
text = text.replace("!", "!") | |
text = text.replace("?", "?") | |
text = text.replace(";", ";") | |
text = text.replace(":", ":") | |
text = text.replace("、", ",") | |
text = text.replace("‘", "'") | |
text = text.replace("’", "'") | |
text = text.replace("⋯", "…") | |
text = text.replace("···", "…") | |
text = text.replace("・・・", "…") | |
text = text.replace("...", "…") | |
text = re.sub(r"\s+", "", text) | |
text = re.sub(r"[^\u4e00-\u9fff\s_,\.\?!;:\'…]", "", text) | |
text = re.sub(r"\s*([,\.\?!;:\'…])\s*", r"\1", text) | |
return text | |
def change_tone(bopomofo: str, tone: str) -> str: | |
if bopomofo[-1] not in "˙ˊˇˋ": | |
bopomofo = bopomofo + tone | |
else: | |
bopomofo = bopomofo[:-1] + tone | |
return bopomofo | |
def er_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
if len(word) > 1 and word[-1] == "儿" and word not in must_not_er_words: | |
bopomofos[-1] = change_tone(bopomofos[-1], "˙") | |
return bopomofos | |
def bu_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
valid_char = set(word) | |
if len(valid_char) == 1 and "不" in valid_char: | |
pass | |
elif word in ["不字"]: | |
pass | |
elif len(word) == 3 and word[1] == "不" and bopomofos[1][:-1] == "ㄅㄨ": | |
bopomofos[1] = bopomofos[1][:-1] + "˙" | |
else: | |
for i, char in enumerate(word): | |
if ( | |
i + 1 < len(bopomofos) | |
and char == "不" | |
and i + 1 < len(word) | |
and 0 < len(bopomofos[i + 1]) | |
and bopomofos[i + 1][-1] == "ˋ" | |
): | |
bopomofos[i] = bopomofos[i][:-1] + "ˊ" | |
return bopomofos | |
def yi_sandhi(word: str, bopomofos: List[str]) -> List[str]: | |
punc = ":,;。?!“”‘’':,;.?!()(){}【】[]-~`、 " | |
if word.find("一") != -1 and any( | |
[item.isnumeric() for item in word if item != "一"] | |
): | |
for i in range(len(word)): | |
if ( | |
i == 0 | |
and word[0] == "一" | |
and len(word) > 1 | |
and word[1] | |
not in [ | |
"零", | |
"一", | |
"二", | |
"三", | |
"四", | |
"五", | |
"六", | |
"七", | |
"八", | |
"九", | |
"十", | |
] | |
): | |
if len(bopomofos[0]) > 0 and bopomofos[1][-1] in ["ˋ", "˙"]: | |
bopomofos[0] = change_tone(bopomofos[0], "ˊ") | |
else: | |
bopomofos[0] = change_tone(bopomofos[0], "ˋ") | |
elif word[i] == "一": | |
bopomofos[i] = change_tone(bopomofos[i], "") | |
return bopomofos | |
elif len(word) == 3 and word[1] == "一" and word[0] == word[-1]: | |
bopomofos[1] = change_tone(bopomofos[1], "˙") | |
elif word.startswith("第一"): | |
bopomofos[1] = change_tone(bopomofos[1], "") | |
elif word.startswith("一月") or word.startswith("一日") or word.startswith("一号"): | |
bopomofos[0] = change_tone(bopomofos[0], "") | |
else: | |
for i, char in enumerate(word): | |
if char == "一" and i + 1 < len(word): | |
if ( | |
len(bopomofos) > i + 1 | |
and len(bopomofos[i + 1]) > 0 | |
and bopomofos[i + 1][-1] in {"ˋ"} | |
): | |
bopomofos[i] = change_tone(bopomofos[i], "ˊ") | |
else: | |
if word[i + 1] not in punc: | |
bopomofos[i] = change_tone(bopomofos[i], "ˋ") | |
else: | |
pass | |
return bopomofos | |
def merge_bu(seg: List) -> List: | |
new_seg = [] | |
last_word = "" | |
for word in seg: | |
if word != "不": | |
if last_word == "不": | |
word = last_word + word | |
new_seg.append(word) | |
last_word = word | |
return new_seg | |
def merge_er(seg: List) -> List: | |
new_seg = [] | |
for i, word in enumerate(seg): | |
if i - 1 >= 0 and word == "儿": | |
new_seg[-1] = new_seg[-1] + seg[i] | |
else: | |
new_seg.append(word) | |
return new_seg | |
def merge_yi(seg: List) -> List: | |
new_seg = [] | |
# function 1 | |
for i, word in enumerate(seg): | |
if ( | |
i - 1 >= 0 | |
and word == "一" | |
and i + 1 < len(seg) | |
and seg[i - 1] == seg[i + 1] | |
): | |
if i - 1 < len(new_seg): | |
new_seg[i - 1] = new_seg[i - 1] + "一" + new_seg[i - 1] | |
else: | |
new_seg.append(word) | |
new_seg.append(seg[i + 1]) | |
else: | |
if i - 2 >= 0 and seg[i - 1] == "一" and seg[i - 2] == word: | |
continue | |
else: | |
new_seg.append(word) | |
seg = new_seg | |
new_seg = [] | |
isnumeric_flag = False | |
for i, word in enumerate(seg): | |
if all([item.isnumeric() for item in word]) and not isnumeric_flag: | |
isnumeric_flag = True | |
new_seg.append(word) | |
else: | |
new_seg.append(word) | |
seg = new_seg | |
new_seg = [] | |
# function 2 | |
for i, word in enumerate(seg): | |
if new_seg and new_seg[-1] == "一": | |
new_seg[-1] = new_seg[-1] + word | |
else: | |
new_seg.append(word) | |
return new_seg | |
# Word Segmentation, and convert Chinese pronunciation to pinyin (bopomofo) | |
def chinese_to_bopomofo(text_short, sentence): | |
# bopomofos = conv(text_short) | |
words = jieba.lcut(text_short, cut_all=False) | |
words = merge_yi(words) | |
words = merge_bu(words) | |
words = merge_er(words) | |
text = "" | |
char_index = 0 | |
for word in words: | |
bopomofos = [] | |
if word in word_pinyin_dict and word not in poly_dict: | |
pinyin = word_pinyin_dict[word] | |
for py in pinyin.split(" "): | |
if py[:-1] in pinyin_2_bopomofo_dict and py[-1] in tone_dict: | |
bopomofos.append( | |
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
) | |
if BLANK_LEVEL == 1: | |
bopomofos.append("_") | |
else: | |
bopomofos_lazy = lazy_pinyin(word, BOPOMOFO) | |
bopomofos += bopomofos_lazy | |
if BLANK_LEVEL == 1: | |
bopomofos.append("_") | |
else: | |
for i in range(len(word)): | |
c = word[i] | |
if c in poly_dict: | |
poly_pinyin = g2pw_poly_predict.predict_process( | |
[text_short, char_index + i] | |
)[0] | |
py = poly_pinyin[2:-1] | |
bopomofos.append( | |
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
) | |
if BLANK_LEVEL == 1: | |
bopomofos.append("_") | |
elif c in word_pinyin_dict: | |
py = word_pinyin_dict[c] | |
bopomofos.append( | |
pinyin_2_bopomofo_dict[py[:-1]] + tone_dict[py[-1]] | |
) | |
if BLANK_LEVEL == 1: | |
bopomofos.append("_") | |
else: | |
bopomofos.append(c) | |
if BLANK_LEVEL == 1: | |
bopomofos.append("_") | |
if BLANK_LEVEL == 2: | |
bopomofos.append("_") | |
char_index += len(word) | |
if ( | |
len(word) == 3 | |
and bopomofos[0][-1] == "ˇ" | |
and bopomofos[1][-1] == "ˇ" | |
and bopomofos[-1][-1] == "ˇ" | |
): | |
bopomofos[0] = bopomofos[0] + "ˊ" | |
bopomofos[1] = bopomofos[1] + "ˊ" | |
if len(word) == 2 and bopomofos[0][-1] == "ˇ" and bopomofos[-1][-1] == "ˇ": | |
bopomofos[0] = bopomofos[0][:-1] + "ˊ" | |
bopomofos = bu_sandhi(word, bopomofos) | |
bopomofos = yi_sandhi(word, bopomofos) | |
bopomofos = er_sandhi(word, bopomofos) | |
if not re.search("[\u4e00-\u9fff]", word): | |
text += "|" + word | |
continue | |
for i in range(len(bopomofos)): | |
bopomofos[i] = re.sub(r"([\u3105-\u3129])$", r"\1ˉ", bopomofos[i]) | |
if text != "": | |
text += "|" | |
text += "|".join(bopomofos) | |
return text | |
# Convert latin pronunciation to pinyin (bopomofo) | |
def latin_to_bopomofo(text): | |
for regex, replacement in _latin_to_bopomofo: | |
text = re.sub(regex, replacement, text) | |
return text | |
# Convert pinyin (bopomofo) to IPA | |
def bopomofo_to_ipa(text): | |
for regex, replacement in _bopomofo_to_ipa: | |
text = re.sub(regex, replacement, text) | |
return text | |
def _chinese_to_ipa(text, sentence): | |
text = number_to_chinese(text.strip()) | |
text = normalization(text) | |
text = chinese_to_bopomofo(text, sentence) | |
# pinyin = bpmf_to_pinyin(text) | |
text = latin_to_bopomofo(text) | |
text = bopomofo_to_ipa(text) | |
text = re.sub("([sɹ]`[⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text) | |
text = re.sub("([s][⁼ʰ]?)([→↓↑ ]+|$)", r"\1ɹ\2", text) | |
text = re.sub(r"^\||[^\w\s_,\.\?!;:\'…\|→↓↑⁼ʰ`]", "", text) | |
text = re.sub(r"([,\.\?!;:\'…])", r"|\1|", text) | |
text = re.sub(r"\|+", "|", text) | |
text = text.rstrip("|") | |
return text | |
# Convert Chinese to IPA | |
def chinese_to_ipa(text, sentence, text_tokenizer): | |
# phonemes = text_tokenizer(text.strip()) | |
if type(text) == str: | |
return _chinese_to_ipa(text, sentence) | |
else: | |
result_ph = [] | |
for t in text: | |
result_ph.append(_chinese_to_ipa(t, sentence)) | |
return result_ph | |