AkitoP commited on
Commit
baa9f02
1 Parent(s): 2372084

Update GPT_SoVITS/text/cleaner.py

Browse files
Files changed (1) hide show
  1. GPT_SoVITS/text/cleaner.py +73 -75
GPT_SoVITS/text/cleaner.py CHANGED
@@ -1,75 +1,73 @@
1
- from text import japanese, cleaned_text_to_sequence, english,korean,cantonese
2
- print(japanese.__file__)
3
- import os
4
- if os.environ.get("version","v1")=="v1":
5
- from text import chinese
6
- from text.symbols import symbols
7
- else:
8
- from text import chinese2 as chinese
9
- from text.symbols2 import symbols
10
- print("THIS IS IN CLEANER.py")
11
- language_module_map = {"zh": chinese, "ja": japanese, "en": english, "ko": korean,"yue":cantonese}
12
- special = [
13
- # ("%", "zh", "SP"),
14
- ("¥", "zh", "SP2"),
15
- ("^", "zh", "SP3"),
16
- # ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
17
- ]
18
-
19
-
20
- def clean_text(text, language):
21
- print('this is clean_text')
22
- if(language not in language_module_map):
23
- language="en"
24
- text=" "
25
- for special_s, special_l, target_symbol in special:
26
- if special_s in text and language == special_l:
27
- return clean_special(text, language, special_s, target_symbol)
28
- language_module = language_module_map[language]
29
- if hasattr(language_module,"text_normalize"):
30
- norm_text = language_module.text_normalize(text)
31
- else:
32
- norm_text=text
33
- if language == "zh" or language=="yue":##########
34
- phones, word2ph = language_module.g2p(norm_text)
35
- assert len(phones) == sum(word2ph)
36
- assert len(norm_text) == len(word2ph)
37
- elif language == "en":
38
- phones = language_module.g2p(norm_text)
39
- if len(phones) < 4:
40
- phones = [','] * (4 - len(phones)) + phones
41
- word2ph = None
42
- else:
43
- phones = language_module.g2p(norm_text)
44
- word2ph = None
45
-
46
- for ph in phones:
47
- assert ph in symbols, ph
48
- return phones, word2ph, norm_text
49
-
50
-
51
- def clean_special(text, language, special_s, target_symbol):
52
- """
53
- 特殊静音段sp符号处理
54
- """
55
- text = text.replace(special_s, ",")
56
- language_module = language_module_map[language]
57
- norm_text = language_module.text_normalize(text)
58
- phones = language_module.g2p(norm_text)
59
- new_ph = []
60
- for ph in phones[0]:
61
- assert ph in symbols
62
- if ph == ",":
63
- new_ph.append(target_symbol)
64
- else:
65
- new_ph.append(ph)
66
- return new_ph, phones[1], norm_text
67
-
68
-
69
- def text_to_sequence(text, language):
70
- phones = clean_text(text)
71
- return cleaned_text_to_sequence(phones)
72
-
73
-
74
- if __name__ == "__main__":
75
- print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))
 
1
+ from text import japanese, cleaned_text_to_sequence
2
+ print(japanese.__file__)
3
+ import os
4
+ if os.environ.get("version","v1")=="v1":
5
+ from text.symbols import symbols
6
+ else:
7
+ from text.symbols2 import symbols
8
+ print("THIS IS IN CLEANER.py")
9
+ language_module_map = { "ja": japanese}
10
+ special = [
11
+ # ("%", "zh", "SP"),
12
+ ("¥", "zh", "SP2"),
13
+ ("^", "zh", "SP3"),
14
+ # ('@', 'zh', "SP4")#不搞鬼畜了,和第二版保持一致吧
15
+ ]
16
+
17
+
18
+ def clean_text(text, language):
19
+ print('this is clean_text')
20
+ if(language not in language_module_map):
21
+ language="en"
22
+ text=" "
23
+ for special_s, special_l, target_symbol in special:
24
+ if special_s in text and language == special_l:
25
+ return clean_special(text, language, special_s, target_symbol)
26
+ language_module = language_module_map[language]
27
+ if hasattr(language_module,"text_normalize"):
28
+ norm_text = language_module.text_normalize(text)
29
+ else:
30
+ norm_text=text
31
+ if language == "zh" or language=="yue":##########
32
+ phones, word2ph = language_module.g2p(norm_text)
33
+ assert len(phones) == sum(word2ph)
34
+ assert len(norm_text) == len(word2ph)
35
+ elif language == "en":
36
+ phones = language_module.g2p(norm_text)
37
+ if len(phones) < 4:
38
+ phones = [','] * (4 - len(phones)) + phones
39
+ word2ph = None
40
+ else:
41
+ phones = language_module.g2p(norm_text)
42
+ word2ph = None
43
+
44
+ for ph in phones:
45
+ assert ph in symbols, ph
46
+ return phones, word2ph, norm_text
47
+
48
+
49
+ def clean_special(text, language, special_s, target_symbol):
50
+ """
51
+ 特殊静音段sp符号处理
52
+ """
53
+ text = text.replace(special_s, ",")
54
+ language_module = language_module_map[language]
55
+ norm_text = language_module.text_normalize(text)
56
+ phones = language_module.g2p(norm_text)
57
+ new_ph = []
58
+ for ph in phones[0]:
59
+ assert ph in symbols
60
+ if ph == ",":
61
+ new_ph.append(target_symbol)
62
+ else:
63
+ new_ph.append(ph)
64
+ return new_ph, phones[1], norm_text
65
+
66
+
67
+ def text_to_sequence(text, language):
68
+ phones = clean_text(text)
69
+ return cleaned_text_to_sequence(phones)
70
+
71
+
72
+ if __name__ == "__main__":
73
+ print(clean_text("你好%啊啊啊额、还是到付红四方。", "zh"))