Spaces:
Paused
Paused
Amamrnaf
commited on
Commit
·
f023da7
1
Parent(s):
c45f1ab
changes
Browse files- coqui_tts.py +22 -3
- metaVoice.py +30 -0
coqui_tts.py
CHANGED
@@ -11,13 +11,32 @@ import shutil
|
|
11 |
import pyloudnorm as pyln
|
12 |
import torch
|
13 |
from TTS.api import TTS
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
|
|
|
15 |
|
16 |
|
17 |
-
def run_audio_generation_v1(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
gpu = True if torch.cuda.is_available() else False
|
19 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
|
20 |
-
|
|
|
21 |
# pre-process story audio file
|
22 |
# convert to 16 bit mono
|
23 |
# remove noise
|
@@ -26,7 +45,7 @@ def run_audio_generation_v1(text,accent='None'):
|
|
26 |
sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
|
27 |
|
28 |
tts.tts_to_file(
|
29 |
-
|
30 |
speaker_wav="./tmp/audio/speaker_wav.wav",
|
31 |
language="en",
|
32 |
file_path="audio/output.wav"
|
|
|
11 |
import pyloudnorm as pyln
|
12 |
import torch
|
13 |
from TTS.api import TTS
|
14 |
+
import string
|
15 |
+
|
16 |
+
def remove_punctuation(sentence):
|
17 |
+
translator = str.maketrans('', '', string.punctuation)
|
18 |
+
sentence = sentence.translate(translator)
|
19 |
+
|
20 |
+
# Remove line breaks
|
21 |
+
sentence = sentence.replace('\n', ' ').replace('\r', '')
|
22 |
|
23 |
+
return sentence
|
24 |
|
25 |
|
26 |
+
def run_audio_generation_v1(new_text,accent='None'):
|
27 |
+
|
28 |
+
new_text = new_text.replace('\n', ' ').replace('\r', '')
|
29 |
+
new_text_mod = remove_punctuation(new_text)
|
30 |
+
|
31 |
+
new_text_split = new_text_mod.split()
|
32 |
+
for word in new_text_split:
|
33 |
+
if len(word)>=2 and word.isupper():
|
34 |
+
new_text = new_text.replace(word, " ".join([*word]))
|
35 |
+
|
36 |
gpu = True if torch.cuda.is_available() else False
|
37 |
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
|
38 |
+
# if not gpu:
|
39 |
+
|
40 |
# pre-process story audio file
|
41 |
# convert to 16 bit mono
|
42 |
# remove noise
|
|
|
45 |
sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
|
46 |
|
47 |
tts.tts_to_file(
|
48 |
+
new_text,
|
49 |
speaker_wav="./tmp/audio/speaker_wav.wav",
|
50 |
language="en",
|
51 |
file_path="audio/output.wav"
|
metaVoice.py
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fam.llm.fast_inference import TTS
|
2 |
+
import string
|
3 |
+
import soundfile as sf
|
4 |
+
|
5 |
+
def remove_punctuation(sentence):
|
6 |
+
translator = str.maketrans('', '', string.punctuation)
|
7 |
+
sentence = sentence.translate(translator)
|
8 |
+
|
9 |
+
# Remove line breaks
|
10 |
+
sentence = sentence.replace('\n', ' ').replace('\r', '')
|
11 |
+
|
12 |
+
return sentence
|
13 |
+
|
14 |
+
def run_audio_generation_v2(new_text,accent='None'):
|
15 |
+
tts = TTS()
|
16 |
+
new_text = new_text.replace('\n', ' ').replace('\r', '')
|
17 |
+
new_text_mod = remove_punctuation(new_text)
|
18 |
+
|
19 |
+
new_text_split = new_text_mod.split()
|
20 |
+
for word in new_text_split:
|
21 |
+
if len(word)>=2 and word.isupper():
|
22 |
+
new_text = new_text.replace(word, " ".join([*word]))
|
23 |
+
|
24 |
+
wav_file = tts.synthesise(
|
25 |
+
text=new_text,
|
26 |
+
spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)
|
27 |
+
)
|
28 |
+
sf.write('audio/output.wav', wav_file, samplerate=22050)
|
29 |
+
|
30 |
+
|