Replaced Encodec with Vocos
Browse files- app.py +2 -1
- utils/g2p/english.py +1 -1
- utils/g2p/japanese.py +1 -2
- utils/g2p/mandarin.py +1 -2
- utils/sentence_cutter.py +1 -13
app.py
CHANGED
@@ -36,7 +36,7 @@ import gradio as gr
|
|
36 |
from vocos import Vocos
|
37 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
38 |
|
39 |
-
|
40 |
|
41 |
torch._C._jit_set_profiling_executor(False)
|
42 |
torch._C._jit_set_profiling_mode(False)
|
@@ -331,6 +331,7 @@ def infer_long_text(text, preset_prompt, prompt=None, language='auto', accent='n
|
|
331 |
fixed-prompt: This mode will keep using the same prompt the user has provided, and generate audio sentence by sentence.
|
332 |
sliding-window: This mode will use the last sentence as the prompt for the next sentence, but has some concern on speaker maintenance.
|
333 |
"""
|
|
|
334 |
if len(text) > 1000:
|
335 |
return "Rejected, Text too long (should be less than 1000 characters)", None
|
336 |
mode = 'fixed-prompt'
|
|
|
36 |
from vocos import Vocos
|
37 |
from transformers import WhisperProcessor, WhisperForConditionalGeneration
|
38 |
|
39 |
+
|
40 |
|
41 |
torch._C._jit_set_profiling_executor(False)
|
42 |
torch._C._jit_set_profiling_mode(False)
|
|
|
331 |
fixed-prompt: This mode will keep using the same prompt the user has provided, and generate audio sentence by sentence.
|
332 |
sliding-window: This mode will use the last sentence as the prompt for the next sentence, but has some concern on speaker maintenance.
|
333 |
"""
|
334 |
+
from utils.sentence_cutter import split_text_into_sentences
|
335 |
if len(text) > 1000:
|
336 |
return "Rejected, Text too long (should be less than 1000 characters)", None
|
337 |
mode = 'fixed-prompt'
|
utils/g2p/english.py
CHANGED
@@ -19,7 +19,6 @@ hyperparameter. Some cleaners are English-specific. You'll typically want to use
|
|
19 |
import re
|
20 |
from unidecode import unidecode
|
21 |
import inflect
|
22 |
-
import eng_to_ipa as ipa
|
23 |
_inflect = inflect.engine()
|
24 |
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
|
25 |
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
@@ -158,6 +157,7 @@ def mark_dark_l(text):
|
|
158 |
|
159 |
|
160 |
def english_to_ipa(text):
|
|
|
161 |
text = unidecode(text).lower()
|
162 |
text = expand_abbreviations(text)
|
163 |
text = normalize_numbers(text)
|
|
|
19 |
import re
|
20 |
from unidecode import unidecode
|
21 |
import inflect
|
|
|
22 |
_inflect = inflect.engine()
|
23 |
_comma_number_re = re.compile(r'([0-9][0-9\,]+[0-9])')
|
24 |
_decimal_number_re = re.compile(r'([0-9]+\.[0-9]+)')
|
|
|
157 |
|
158 |
|
159 |
def english_to_ipa(text):
|
160 |
+
import eng_to_ipa as ipa
|
161 |
text = unidecode(text).lower()
|
162 |
text = expand_abbreviations(text)
|
163 |
text = normalize_numbers(text)
|
utils/g2p/japanese.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1 |
import re
|
2 |
from unidecode import unidecode
|
3 |
-
import pyopenjtalk
|
4 |
|
5 |
|
6 |
|
@@ -74,7 +73,7 @@ def symbols_to_japanese(text):
|
|
74 |
|
75 |
def japanese_to_romaji_with_accent(text):
|
76 |
'''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
|
77 |
-
|
78 |
text = symbols_to_japanese(text)
|
79 |
sentences = re.split(_japanese_marks, text)
|
80 |
marks = re.findall(_japanese_marks, text)
|
|
|
1 |
import re
|
2 |
from unidecode import unidecode
|
|
|
3 |
|
4 |
|
5 |
|
|
|
73 |
|
74 |
def japanese_to_romaji_with_accent(text):
|
75 |
'''Reference https://r9y9.github.io/ttslearn/latest/notebooks/ch10_Recipe-Tacotron.html'''
|
76 |
+
import pyopenjtalk
|
77 |
text = symbols_to_japanese(text)
|
78 |
sentences = re.split(_japanese_marks, text)
|
79 |
marks = re.findall(_japanese_marks, text)
|
utils/g2p/mandarin.py
CHANGED
@@ -4,7 +4,6 @@ import re
|
|
4 |
import jieba
|
5 |
import cn2an
|
6 |
import logging
|
7 |
-
from pypinyin import lazy_pinyin, BOPOMOFO
|
8 |
|
9 |
|
10 |
# List of (Latin alphabet, bopomofo) pairs:
|
@@ -241,7 +240,7 @@ def number_to_chinese(text):
|
|
241 |
|
242 |
|
243 |
def chinese_to_bopomofo(text):
|
244 |
-
|
245 |
text = text.replace('、', ',').replace(';', ',').replace(':', ',')
|
246 |
words = jieba.lcut(text, cut_all=False)
|
247 |
text = ''
|
|
|
4 |
import jieba
|
5 |
import cn2an
|
6 |
import logging
|
|
|
7 |
|
8 |
|
9 |
# List of (Latin alphabet, bopomofo) pairs:
|
|
|
240 |
|
241 |
|
242 |
def chinese_to_bopomofo(text):
|
243 |
+
from pypinyin import lazy_pinyin, BOPOMOFO
|
244 |
text = text.replace('、', ',').replace(';', ',').replace(':', ',')
|
245 |
words = jieba.lcut(text, cut_all=False)
|
246 |
text = ''
|
utils/sentence_cutter.py
CHANGED
@@ -40,16 +40,4 @@ def split_text_into_sentences(text):
|
|
40 |
|
41 |
return sentences
|
42 |
|
43 |
-
raise RuntimeError("It is impossible to reach here.")
|
44 |
-
|
45 |
-
long_text = """
|
46 |
-
This is a very long paragraph, so most TTS model is unable to handle it. Hence, we have to split it into several sentences. With the help of NLTK, we can split it into sentences. However, the punctuation is not preserved, so we have to add it back. How are we going to do write this code? Let's see.
|
47 |
-
"""
|
48 |
-
|
49 |
-
long_text = """
|
50 |
-
现在我们要来尝试一下中文分句。因为很不幸的是,NLTK不支持中文分句。幸运的是,我们可以使用jieba来分句。但是,jieba分句后,标点符号会丢失,所以我们要手动添加回去。我现在正在想办法把这个例句写的更长更复杂一点,来测试jieba分句的性能。嗯......省略号,感觉不太好,因为省略号不是句号,所以jieba不会把它当作句子的结尾。会这样吗?我们来试试看。
|
51 |
-
"""
|
52 |
-
|
53 |
-
long_text = """
|
54 |
-
これなら、英語と中国語の分句もできる。でも、日本語はどうする?まつわ、ChatGPTに僕と教えてください。ちょーと待ってください。あ、出来た!
|
55 |
-
"""
|
|
|
40 |
|
41 |
return sentences
|
42 |
|
43 |
+
raise RuntimeError("It is impossible to reach here.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|