PHBJT commited on
Commit
a1cb9c1
·
verified ·
1 Parent(s): 453ef66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +53 -28
app.py CHANGED
@@ -12,7 +12,7 @@ from transformers import AutoTokenizer, AutoFeatureExtractor, set_seed
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
 
14
 
15
- repo_id = "PHBJT/french_parler_tts_mini_v0.1"
16
 
17
  model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
18
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
@@ -25,26 +25,54 @@ SEED = 42
25
  default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
26
  default_description = "A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
27
  examples = [
28
- [
29
- "La voix humaine est un instrument de musique au-dessus de tous les autres.",
30
- "A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
31
- None,
32
- ],
33
- [
34
- "Tout ce qu'un homme est capable d'imaginer, d'autres hommes seront capables de le réaliser.",
35
- "A male voice delivers a slightly expressive and animated speech with a moderate speed. The recording features a low-pitch voice, creating a close-sounding audio experience.",
36
- None,
37
- ],
38
- [
39
- "La machine elle-même, si perfectionnée qu'on la suppose, n'est qu'un outil.",
40
- "A male voice provides a monotone yet slightly fast delivery, with a very close recording that almost has no background noise.",
41
- None,
42
- ],
43
- [
44
- "Le progrès fait naître plus de besoins qu'il n'en satisfait.",
45
- "A female voice, in a very poor recording quality, delivers slightly expressive and animated words with a fast pace. There's a high level of background noise and a very distant-sounding reverberation. The voice is slightly higher pitched than average.",
46
- None,
47
- ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  ]
49
  number_normalizer = EnglishNumberNormalizer()
50
 
@@ -134,16 +162,13 @@ with gr.Blocks(css=css) as block:
134
  """
135
  )
136
  gr.HTML(
137
- f"""
138
  <p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
139
  high-fidelity text-to-speech (TTS) models.</p>
140
- <p>The model demonstrated here, French Parler-TTS <a href="https://huggingface.co/PHBJT/french_parler_tts_mini_v0.1">Mini v0.1 French</a>,
141
- has been fine-tuned on a French dataset. It generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation).
142
- Due to limitations on the dataset, this model might underperform for female voices (we recommend using male voices only).</p>
143
 
144
- <p>By default, Parler-TTS generates 🎲 random male voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
145
- <p><b>Note:</b> do NOT specify the nationnality of the speaker it will cause inconsistent audio generation (do: "a male speaker", don't: "a french male speaker") </p>
146
- <p><b>Important note:</b> this model does NOT work in english, it will generate incoherent audios. But you can still use the original Parler TTS model for that. </p>
147
  """
148
  )
149
  with gr.Row():
 
12
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
13
 
14
 
15
+ repo_id = "https://huggingface.co/ylacombe/p-m-e"
16
 
17
  model = ParlerTTSForConditionalGeneration.from_pretrained(repo_id).to(device)
18
  tokenizer = AutoTokenizer.from_pretrained(repo_id)
 
25
  default_text = "La voix humaine est un instrument de musique au-dessus de tous les autres."
26
  default_description = "A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue."
27
  examples = [
28
+ # French
29
+ [
30
+ "La voix humaine est un instrument de musique au-dessus de tous les autres.",
31
+ "A male voice speaks slowly with a very noisy background, displaying a touch of expressiveness and animation. The sound is very distant, adding an air of intrigue.",
32
+ None,
33
+ ],
34
+ # Spanish
35
+ [
36
+ "La voz es el reflejo del alma en el espejo del tiempo.",
37
+ "A female voice speaks with moderate speed, showing warmth and clarity. The recording is clean with minimal background noise and has natural resonance.",
38
+ None,
39
+ ],
40
+ # Italian
41
+ [
42
+ "La voce umana è la più bella musica che esista al mondo.",
43
+ "A male voice delivers the message with passion and depth. The recording has good clarity with slight room acoustics and a medium-distance perspective.",
44
+ None,
45
+ ],
46
+ # Portuguese
47
+ [
48
+ "A voz é o espelho da alma e o som do coração.",
49
+ "A young female voice speaks with enthusiasm and energy. The recording is close-miked with crisp audio quality and subtle room ambiance.",
50
+ None,
51
+ ],
52
+ # Polish
53
+ [
54
+ "Głos ludzki jest najpiękniejszym instrumentem świata.",
55
+ "An elderly male voice speaks with wisdom and gravitas. The recording has a vintage quality with some characteristic analog warmth.",
56
+ None,
57
+ ],
58
+ # German
59
+ [
60
+ "Die menschliche Stimme ist das schönste Instrument der Welt.",
61
+ "A mature female voice speaks with authority and precision. The recording is studio-quality with perfect clarity and no background noise.",
62
+ None,
63
+ ],
64
+ # Dutch
65
+ [
66
+ "De menselijke stem is het mooiste instrument dat er bestaat.",
67
+ "A middle-aged male voice speaks with gentle inflection and warmth. The recording has natural room acoustics and balanced frequency response.",
68
+ None,
69
+ ],
70
+ # English
71
+ [
72
+ "The human voice is nature's most perfect instrument.",
73
+ "A young male voice speaks with dynamic expression and energy. The recording is professional quality with subtle environmental ambiance.",
74
+ None,
75
+ ],
76
  ]
77
  number_normalizer = EnglishNumberNormalizer()
78
 
 
162
  """
163
  )
164
  gr.HTML(
165
+ f"""
166
  <p><a href="https://github.com/huggingface/parler-tts">Parler-TTS</a> is a training and inference library for
167
  high-fidelity text-to-speech (TTS) models.</p>
168
+ <p>This multilingual model supports French, Spanish, Italian, Portuguese, Polish, German, Dutch, and English. It generates high-quality speech with features that can be controlled using a simple text prompt (e.g. gender, background noise, speaking rate, pitch and reverberation). </p>
 
 
169
 
170
+ <p>By default, Parler-TTS generates 🎲 random voice characteristics. To ensure 🎯 <b>speaker consistency</b> across generations, try to use consistent descriptions in your prompts.</p>
171
+ <p><b>Note:</b> you do not need to specify the nationality of the speaker in the description (do: "a male speaker", don't: "a french male speaker") </p>
 
172
  """
173
  )
174
  with gr.Row():