Spaces:

awkondoro
/

swahili_tts

Build error

awkondoro commited on Sep 24

Commit

3f312c5

•

1 Parent(s): ed40040

initial commit

Files changed (2) hide show

app.py ADDED Viewed

+import gradio as gr
+import tensorflow as tf
+from tensorflow_tts.inference import TFAutoModel, AutoProcessor
+import soundfile as sf
+# Load the model and processor
+lightspeech = TFAutoModel.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
+processor = AutoProcessor.from_pretrained("bookbot/lightspeech-mfa-sw-v4")
+mb_melgan = TFAutoModel.from_pretrained("bookbot/mb-melgan-hifi-postnets-sw-v4")
+def tts(text, speaker_name="sw-TZ-Victoria"):
+    # Process input text
+    input_ids = processor.text_to_sequence(text)
+    # Generate mel-spectrogram
+    mel, _, _ = lightspeech.inference(
+        input_ids=tf.expand_dims(tf.convert_to_tensor(input_ids, dtype=tf.int32), 0),
+        speaker_ids=tf.convert_to_tensor(
+            [processor.speakers_map[speaker_name]], dtype=tf.int32
+        ),
+        speed_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
+        f0_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
+        energy_ratios=tf.convert_to_tensor([1.0], dtype=tf.float32),
+    )
+    # Generate audio from mel-spectrogram
+    audio = mb_melgan.inference(mel)[0, :, 0]
+    # Save to file
+    sf.write("output.wav", audio, 44100, "PCM_16")
+    # Return the audio file for Gradio to play
+    return "output.wav"
+# Create a Gradio interface
+iface = gr.Interface(fn=tts, inputs="text", outputs="audio")
+# Launch the interface
+iface.launch()

requirements.txt ADDED Viewed

+tensorflow
+tensorflow-tts
+soundfile
+gradio