sanchit-gandhi commited on
Commit
1700872
·
1 Parent(s): 6f5cea7

description

Browse files
Files changed (1) hide show
  1. app.py +9 -6
app.py CHANGED
@@ -57,8 +57,8 @@ examples = [
57
 
58
  jenny_examples = [
59
  [
60
- "Remember - this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of five times.",
61
- "Jenny speaks at a fast pace in a small, confined space with a very clear audio and an animated tone.",
62
  2.5,
63
  ],
64
  [
@@ -73,7 +73,7 @@ jenny_examples = [
73
  ],
74
  [
75
  "Montrose also, after having experienced still more variety of good and bad fortune, threw down his arms, and retired out of the kingdom.",
76
- "Jenny delivers words at a fast pace and an animated tone, in a very spacious environment, accompanied by noticeable background noise.",
77
  2.5,
78
  ],
79
  ]
@@ -323,15 +323,18 @@ with gr.Blocks(css=css) as block:
323
  gr.HTML(
324
  f"""
325
  <p><a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a> is a training and inference library for
326
- high-fidelity text-to-speech (TTS) models. The model demonstrated here, <a href="https://huggingface.co/parler-tts/parler_tts_mini_v0.1"> Parler-TTS Mini v0.1</a>,
327
- is the first iteration model trained using 10k hours of narrated audiobooks. It generates high-quality speech
328
- with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).</p>
 
 
329
 
330
  <p>Tips for ensuring good generation:
331
  <ul>
332
  <li>Include the term "very clear audio" to generate the highest quality audio, and "very noisy audio" for high levels of background noise</li>
333
  <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
334
  <li>The remaining speech features (gender, speaking rate, pitch and reverberation) can be controlled directly through the prompt</li>
 
335
  </ul>
336
  </p>
337
  """
 
57
 
58
  jenny_examples = [
59
  [
60
+ "Remember, this is only the first iteration of the model! To improve the prosody and naturalness of the speech further, we're scaling up the amount of training data by a factor of five times.",
61
+ "Jenny speaks at an average pace with a slightly animated delivery in a very confined sounding environment with clear audio quality.",
62
  2.5,
63
  ],
64
  [
 
73
  ],
74
  [
75
  "Montrose also, after having experienced still more variety of good and bad fortune, threw down his arms, and retired out of the kingdom.",
76
+ "Jenny delivers her words at a fast pace and an animated tone, in a very spacious environment, accompanied by noticeable background noise.",
77
  2.5,
78
  ],
79
  ]
 
323
  gr.HTML(
324
  f"""
325
  <p><a href="https://github.com/huggingface/parler-tts"> Parler-TTS</a> is a training and inference library for
326
+ high-fidelity text-to-speech (TTS) models. Two models are demonstrated here, <a href="https://huggingface.co/parler-tts/parler_tts_mini_v0.1"> Parler-TTS Mini v0.1</a>,
327
+ is the first iteration model trained using 10k hours of narrated audiobooks, and <a href="https://huggingface.co/ylacombe/parler-tts-mini-jenny-30H"> Parler-TTS Jenny</a>,
328
+ a model fine-tuned on the <a href="https://huggingface.co/datasets/reach-vb/jenny_tts_dataset"> Jenny dataset</a>.</p>
329
+
330
+ <p>Both models generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).</p>
331
 
332
  <p>Tips for ensuring good generation:
333
  <ul>
334
  <li>Include the term "very clear audio" to generate the highest quality audio, and "very noisy audio" for high levels of background noise</li>
335
  <li>Punctuation can be used to control the prosody of the generations, e.g. use commas to add small breaks in speech</li>
336
  <li>The remaining speech features (gender, speaking rate, pitch and reverberation) can be controlled directly through the prompt</li>
337
+ <li>Include the term "Jenny" when using the fine-tuned Jenny model to pick out her voice</li>
338
  </ul>
339
  </p>
340
  """