awkondoro commited on
Commit
3f312c5
1 Parent(s): ed40040

initial commit

Browse files
Files changed (2) hide show
  1. app.py +41 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from tensorflow_tts.inference import TFAutoModel, AutoProcessor
4
+ import soundfile as sf
5
+
6
+ # Load the model and processor
7
+ lightspeech = TFAutoModel.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
8
+ processor = AutoProcessor.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
9
+ mb_melgan = TFAutoModel.from_pretrained("bookbot/mb-melgan-hifi-postnets-sw-v4")
10
+
11
+
12
+ def tts(text, speaker_name="sw-TZ-Victoria"):
13
+ # Process input text
14
+ input_ids = processor.text_to_sequence(text)
15
+
16
+ # Generate mel-spectrogram
17
+ mel, _, _ = lightspeech.inference(
18
+ input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
19
+ speaker_ids=tf.convert_to_tensor(
20
+ [processor.speakers_map[speaker_name]], dtype=tf.int32
21
+ ),
22
+ speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
23
+ f0_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
24
+ energy_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
25
+ )
26
+
27
+ # Generate audio from mel-spectrogram
28
+ audio = mb_melgan.inference(mel)[0, :, 0]
29
+
30
+ # Save to file
31
+ sf.write("output.wav", audio, 44100, "PCM_16")
32
+
33
+ # Return the audio file for Gradio to play
34
+ return "output.wav"
35
+
36
+
37
+ # Create a Gradio interface
38
+ iface = gr.Interface(fn=tts, inputs="text", outputs="audio")
39
+
40
+ # Launch the interface
41
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ tensorflow
2
+ tensorflow-tts
3
+ soundfile
4
+ gradio