Spaces:
Runtime error
Runtime error
Commit
·
f43268d
1
Parent(s):
3a2be8a
update app.py
Browse files
app.py
CHANGED
@@ -42,33 +42,19 @@ VOICE_OPTIONS = [
|
|
42 |
"random", # special option for random voice
|
43 |
]
|
44 |
|
45 |
-
|
46 |
def inference(
|
47 |
text,
|
48 |
-
voice
|
49 |
-
seed,
|
50 |
):
|
51 |
if text is None or text.strip() == "":
|
52 |
-
|
53 |
-
text = f.read()
|
54 |
-
if text.strip() == "":
|
55 |
-
raise gr.Error("Please provide either text or script file with content.")
|
56 |
-
|
57 |
-
if split_by_newline == "Yes":
|
58 |
-
texts = list(filter(lambda x: x.strip() != "", text.split("\n")))
|
59 |
-
else:
|
60 |
-
texts = split_and_recombine_text(text)
|
61 |
|
62 |
-
|
63 |
|
64 |
-
|
65 |
-
voice_samples, conditioning_latents = load_voice(voice)
|
66 |
-
else:
|
67 |
-
voice_samples, conditioning_latents = load_voices(voices)
|
68 |
|
69 |
start_time = time.time()
|
70 |
|
71 |
-
# all_parts = []
|
72 |
for j, text in enumerate(texts):
|
73 |
for audio_frame in tts.tts_with_preset(
|
74 |
text,
|
@@ -77,21 +63,21 @@ def inference(
|
|
77 |
preset="ultra_fast",
|
78 |
k=1
|
79 |
):
|
80 |
-
# print("Time taken: ", time.time() - start_time)
|
81 |
-
# all_parts.append(audio_frame)
|
82 |
yield (24000, audio_frame.cpu().detach().numpy())
|
83 |
|
84 |
-
# wav = torch.cat(all_parts, dim=0).unsqueeze(0)
|
85 |
-
# print(wav.shape)
|
86 |
-
# torchaudio.save("output.wav", wav.cpu(), 24000)
|
87 |
-
# yield (None, gr.make_waveform(audio="output.wav",))
|
88 |
def main():
|
89 |
-
title = "Tortoise TTS"
|
90 |
description = """
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
"""
|
92 |
text = gr.Textbox(
|
93 |
lines=4,
|
94 |
-
label="Text
|
95 |
)
|
96 |
|
97 |
voice = gr.Dropdown(
|
@@ -99,12 +85,12 @@ def main():
|
|
99 |
)
|
100 |
|
101 |
output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
|
102 |
-
|
103 |
interface = gr.Interface(
|
104 |
fn=inference,
|
105 |
inputs=[
|
106 |
text,
|
107 |
-
voice
|
108 |
],
|
109 |
title=title,
|
110 |
description=description,
|
@@ -112,7 +98,6 @@ def main():
|
|
112 |
)
|
113 |
interface.queue().launch()
|
114 |
|
115 |
-
|
116 |
if __name__ == "__main__":
|
117 |
tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
|
118 |
|
@@ -121,4 +106,4 @@ if __name__ == "__main__":
|
|
121 |
f"\n\n-------------------------Tortoise TTS Scripts Logs, {datetime.now()}-------------------------\n"
|
122 |
)
|
123 |
|
124 |
-
main()
|
|
|
42 |
"random", # special option for random voice
|
43 |
]
|
44 |
|
|
|
45 |
def inference(
|
46 |
text,
|
47 |
+
voice
|
|
|
48 |
):
|
49 |
if text is None or text.strip() == "":
|
50 |
+
raise gr.Error("Please provide text.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
51 |
|
52 |
+
texts = split_and_recombine_text(text)
|
53 |
|
54 |
+
voice_samples, conditioning_latents = load_voice(voice)
|
|
|
|
|
|
|
55 |
|
56 |
start_time = time.time()
|
57 |
|
|
|
58 |
for j, text in enumerate(texts):
|
59 |
for audio_frame in tts.tts_with_preset(
|
60 |
text,
|
|
|
63 |
preset="ultra_fast",
|
64 |
k=1
|
65 |
):
|
|
|
|
|
66 |
yield (24000, audio_frame.cpu().detach().numpy())
|
67 |
|
|
|
|
|
|
|
|
|
68 |
def main():
|
69 |
+
title = "Tortoise TTS 🐢"
|
70 |
description = """
|
71 |
+
A text-to-speech system which powers lot of organizations in Speech synthesis domain.
|
72 |
+
<br/>
|
73 |
+
A model with strong multi-voice capabilities, highly realistic prosody and intonation.
|
74 |
+
<br/>
|
75 |
+
For faster inference, use the 'ultra_fast' preset and duplicate space if you don't want to wait in a queue.
|
76 |
+
<br/>
|
77 |
"""
|
78 |
text = gr.Textbox(
|
79 |
lines=4,
|
80 |
+
label="Text:",
|
81 |
)
|
82 |
|
83 |
voice = gr.Dropdown(
|
|
|
85 |
)
|
86 |
|
87 |
output_audio = gr.Audio(label="streaming audio:", streaming=True, autoplay=True)
|
88 |
+
|
89 |
interface = gr.Interface(
|
90 |
fn=inference,
|
91 |
inputs=[
|
92 |
text,
|
93 |
+
voice
|
94 |
],
|
95 |
title=title,
|
96 |
description=description,
|
|
|
98 |
)
|
99 |
interface.queue().launch()
|
100 |
|
|
|
101 |
if __name__ == "__main__":
|
102 |
tts = TextToSpeech(kv_cache=True, use_deepspeed=True, half=True)
|
103 |
|
|
|
106 |
f"\n\n-------------------------Tortoise TTS Scripts Logs, {datetime.now()}-------------------------\n"
|
107 |
)
|
108 |
|
109 |
+
main()
|