Spaces:
Sleeping
Sleeping
Commit
·
1700872
1
Parent(s):
6f5cea7
description
Browse files
app.py
CHANGED
@@ -57,8 +57,8 @@ examples = [
|
|
57 |
|
58 |
jenny_examples = [
|
59 |
[
|
60 |
-
"Remember
|
61 |
-
"Jenny speaks at
|
62 |
2.5,
|
63 |
],
|
64 |
[
|
@@ -73,7 +73,7 @@ jenny_examples = [
|
|
73 |
],
|
74 |
[
|
75 |
"Montrose also, after having experienced still more variety of good and bad fortune, threw down his arms, and retired out of the kingdom.",
|
76 |
-
"Jenny delivers words at a fast pace and an animated tone, in a very spacious environment, accompanied by noticeable background noise.",
|
77 |
2.5,
|
78 |
],
|
79 |
]
|
@@ -323,15 +323,18 @@ with gr.Blocks(css=css) as block:
|
|
323 |
gr.HTML(
|
324 |
f"""
|
325 |
<p><a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a> is a training and inference library for
|
326 |
-
high-fidelity text-to-speech (TTS) models.
|
327 |
-
is the first iteration model trained using 10k hours of narrated audiobooks
|
328 |
-
|
|
|
|
|
329 |
|
330 |
<p>Tips for ensuring good generation:
|
331 |
<ul>
|
332 |
<li>Include the term "very clear audio" to generate the highest quality audio, and "very noisy audio" for high levels of background noise</li>
|
333 |
<li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
|
334 |
<li>The remaining speech features (gender, speaking rate, pitch and reverberation) can be controlled directly through the prompt</li>
|
|
|
335 |
</ul>
|
336 |
</p>
|
337 |
"""
|
|
|
57 |
|
58 |
jenny_examples = [
|
59 |
[
|
60 |
+
"Remember, this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of five times.",
|
61 |
+
"Jenny speaks at an average pace with a slightly animated delivery in a very confined sounding environment with clear audio quality.",
|
62 |
2.5,
|
63 |
],
|
64 |
[
|
|
|
73 |
],
|
74 |
[
|
75 |
"Montrose also, after having experienced still more variety of good and bad fortune, threw down his arms, and retired out of the kingdom.",
|
76 |
+
"Jenny delivers her words at a fast pace and an animated tone, in a very spacious environment, accompanied by noticeable background noise.",
|
77 |
2.5,
|
78 |
],
|
79 |
]
|
|
|
323 |
gr.HTML(
|
324 |
f"""
|
325 |
<p><a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a> is a training and inference library for
|
326 |
+
high-fidelity text-to-speech (TTS) models. Two models are demonstrated here, <a href="https://huggingface.co/parler-tts/parler_tts_mini_v0.1"> Parler-TTS Mini v0.1</a>,
|
327 |
+
is the first iteration model trained using 10k hours of narrated audiobooks, and <a href="https://huggingface.co/ylacombe/parler-tts-mini-jenny-30H"> Parler-TTS Jenny</a>,
|
328 |
+
a model fine-tuned on the <a href="https://huggingface.co/datasets/reach-vb/jenny_tts_dataset"> Jenny dataset</a>.</p>
|
329 |
+
|
330 |
+
<p>Both models generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).</p>
|
331 |
|
332 |
<p>Tips for ensuring good generation:
|
333 |
<ul>
|
334 |
<li>Include the term "very clear audio" to generate the highest quality audio, and "very noisy audio" for high levels of background noise</li>
|
335 |
<li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
|
336 |
<li>The remaining speech features (gender, speaking rate, pitch and reverberation) can be controlled directly through the prompt</li>
|
337 |
+
<li>Include the term "Jenny" when using the fine-tuned Jenny model to pick out her voice</li>
|
338 |
</ul>
|
339 |
</p>
|
340 |
"""
|