Amamrnaf commited on
Commit
f023da7
·
1 Parent(s): c45f1ab
Files changed (2) hide show
  1. coqui_tts.py +22 -3
  2. metaVoice.py +30 -0
coqui_tts.py CHANGED
@@ -11,13 +11,32 @@ import shutil
11
  import pyloudnorm as pyln
12
  import torch
13
  from TTS.api import TTS
 
 
 
 
 
 
 
 
14
 
 
15
 
16
 
17
- def run_audio_generation_v1(text,accent='None'):
 
 
 
 
 
 
 
 
 
18
  gpu = True if torch.cuda.is_available() else False
19
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
20
-
 
21
  # pre-process story audio file
22
  # convert to 16 bit mono
23
  # remove noise
@@ -26,7 +45,7 @@ def run_audio_generation_v1(text,accent='None'):
26
  sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
27
 
28
  tts.tts_to_file(
29
- text,
30
  speaker_wav="./tmp/audio/speaker_wav.wav",
31
  language="en",
32
  file_path="audio/output.wav"
 
11
  import pyloudnorm as pyln
12
  import torch
13
  from TTS.api import TTS
14
+ import string
15
+
16
+ def remove_punctuation(sentence):
17
+ translator = str.maketrans('', '', string.punctuation)
18
+ sentence = sentence.translate(translator)
19
+
20
+ # Remove line breaks
21
+ sentence = sentence.replace('\n', ' ').replace('\r', '')
22
 
23
+ return sentence
24
 
25
 
26
+ def run_audio_generation_v1(new_text,accent='None'):
27
+
28
+ new_text = new_text.replace('\n', ' ').replace('\r', '')
29
+ new_text_mod = remove_punctuation(new_text)
30
+
31
+ new_text_split = new_text_mod.split()
32
+ for word in new_text_split:
33
+ if len(word)>=2 and word.isupper():
34
+ new_text = new_text.replace(word, " ".join([*word]))
35
+
36
  gpu = True if torch.cuda.is_available() else False
37
  tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
38
+ # if not gpu:
39
+
40
  # pre-process story audio file
41
  # convert to 16 bit mono
42
  # remove noise
 
45
  sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
46
 
47
  tts.tts_to_file(
48
+ new_text,
49
  speaker_wav="./tmp/audio/speaker_wav.wav",
50
  language="en",
51
  file_path="audio/output.wav"
metaVoice.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fam.llm.fast_inference import TTS
2
+ import string
3
+ import soundfile as sf
4
+
5
+ def remove_punctuation(sentence):
6
+ translator = str.maketrans('', '', string.punctuation)
7
+ sentence = sentence.translate(translator)
8
+
9
+ # Remove line breaks
10
+ sentence = sentence.replace('\n', ' ').replace('\r', '')
11
+
12
+ return sentence
13
+
14
+ def run_audio_generation_v2(new_text,accent='None'):
15
+ tts = TTS()
16
+ new_text = new_text.replace('\n', ' ').replace('\r', '')
17
+ new_text_mod = remove_punctuation(new_text)
18
+
19
+ new_text_split = new_text_mod.split()
20
+ for word in new_text_split:
21
+ if len(word)>=2 and word.isupper():
22
+ new_text = new_text.replace(word, " ".join([*word]))
23
+
24
+ wav_file = tts.synthesise(
25
+ text=new_text,
26
+ spk_ref_path="./tmp/audio/speaker_wav.wav" # you can use any speaker reference file (WAV, OGG, MP3, FLAC, etc.)
27
+ )
28
+ sf.write('audio/output.wav', wav_file, samplerate=22050)
29
+
30
+