TekamBrice commited on
Commit
c12a8e0
·
verified ·
1 Parent(s): 5af7631

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -12
app.py CHANGED
@@ -1,16 +1,44 @@
1
- import transformers
2
- from transformers import pipeline
3
- te_speech=pipeline(model="suno/bark")
 
 
4
  import gradio as gr
5
- def text_to_speech(message):
6
- texte = te_speech(message)
7
 
8
- return texte['audio']
 
 
 
 
 
9
 
10
- demo_textspeech = gr.Interface(text_to_speech, inputs = 'text',
11
- outputs = gr.Audio(label="Speech Output"),
12
- title = 'text to Audio Application',
13
- description = 'A simple application to convert text in audio speech.',
14
- )
15
 
16
- demo_textspeech.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
2
+ from datasets import load_dataset
3
+ import torch
4
+ import soundfile as sf
5
+ import numpy as np
6
  import gradio as gr
7
+ import io
 
8
 
9
+ # Charger les modèles et les embeddings du locuteur une seule fois pour éviter de les recharger à chaque appel
10
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
11
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
12
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
13
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
14
+ speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
15
 
 
 
 
 
 
16
 
17
+ def text_to_speech(text):
18
+ # Prétraiter le texte
19
+ inputs = processor(text=text, return_tensors="pt")
20
+
21
+ # Générer la parole
22
+ speech = model.generate_speech(
23
+ inputs["input_ids"], speaker_embeddings, vocoder=vocoder
24
+ )
25
+
26
+ # Enregistrer l'audio dans un buffer
27
+ buffer = io.BytesIO()
28
+ sf.write(buffer, speech.numpy(), samplerate=16000, format="WAV")
29
+
30
+ return buffer.getvalue()
31
+
32
+
33
+ # Créer l'interface Gradio
34
+ interface = gr.Interface(
35
+ fn=text_to_speech,
36
+ inputs="text",
37
+ outputs=gr.Audio(label="Processed Audio"),
38
+ title="Application du type Text to speech",
39
+ description="Entrez un texte en anglais et l'application va la traduire en audio"
40
+ )
41
+
42
+ # Lancer l'interface Gradio
43
+ if __name__ == "__main__":
44
+ interface.launch()