Shabbir-Anjum commited on
Commit
f84c58e
·
verified ·
1 Parent(s): a6de52c

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +52 -0
main.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline, SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
3
+ from datasets import load_dataset
4
+ import torch
5
+ import soundfile as sf
6
+ import os
7
+
8
+ # Function to generate speech using the pipeline method
9
+ def generate_speech_pipeline(text, speaker_embedding):
10
+ synthesiser = pipeline("text-to-speech", "microsoft/speecht5_tts")
11
+ speech = synthesiser(text, forward_params={"speaker_embeddings": speaker_embedding})
12
+ return speech["audio"], speech["sampling_rate"]
13
+
14
+ # Function to generate speech using the processor + generate method
15
+ def generate_speech_processor(text, speaker_embedding):
16
+ processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
17
+ model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts")
18
+ vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
19
+
20
+ inputs = processor(text=text, return_tensors="pt")
21
+ speech = model.generate_speech(inputs["input_ids"], speaker_embedding, vocoder=vocoder)
22
+ return speech.numpy(), 16000
23
+
24
+ def main():
25
+ st.title("Text-to-Speech with SpeechT5")
26
+
27
+ st.write("Enter the text you want to convert to speech:")
28
+
29
+ text = st.text_area("Text", "Hello, my dog is cooler than you!")
30
+
31
+ if st.button("Generate Speech"):
32
+ st.write("Generating speech...")
33
+
34
+ # Load xvector containing speaker's voice characteristics from a dataset
35
+ embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
36
+ speaker_embedding = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)
37
+
38
+ # Choose the method to generate speech
39
+ method = st.selectbox("Choose the method for generating speech", ["Pipeline", "Processor + Generate"])
40
+
41
+ if method == "Pipeline":
42
+ audio, samplerate = generate_speech_pipeline(text, speaker_embedding)
43
+ else:
44
+ audio, samplerate = generate_speech_processor(text, speaker_embedding)
45
+
46
+ # Save and play the generated speech
47
+ output_path = "speech.wav"
48
+ sf.write(output_path, audio, samplerate=samplerate)
49
+ st.audio(output_path)
50
+
51
+ if __name__ == "__main__":
52
+ main()