Spaces:

Babyloncoder
/

Text-to-speech-with-pitch-controls

Sleeping

App Files Files Community

Babyloncoder commited on Mar 24, 2024

Commit

c4ff521

verified ·

1 Parent(s): 9f268aa

Create app.py

Browse files

Files changed (1) hide show

app.py +52 -0

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import gradio as gr
+from transformers import VitsModel, AutoTokenizer
+import torch
+import scipy.io.wavfile
+import numpy as np
+import librosa
+import soundfile as sf
+import tempfile
+# Load the model and tokenizer
+model = VitsModel.from_pretrained("facebook/mms-tts-eng")
+tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
+def pitch_shift_np(audio_np, sampling_rate, pitch_shift):
+    # Correcting the function call
+    return librosa.effects.pitch_shift(audio_np, sr=sampling_rate, n_steps=pitch_shift)
+def synthesize_speech(text, pitch_shift):
+    # Tokenize the input text
+    inputs = tokenizer(text, return_tensors="pt")
+    # Generate waveform
+    with torch.no_grad():
+        output = model(**inputs).waveform.squeeze().numpy()
+    # Pitch shift
+    shifted_audio = pitch_shift_np(output, model.config.sampling_rate, pitch_shift)
+    # Save to a temporary file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as fp:
+        sf.write(fp.name, shifted_audio, model.config.sampling_rate)
+        temp_file_path = fp.name
+    return temp_file_path
+# Create the Gradio interface
+interface = gr.Interface(
+    fn=synthesize_speech,
+    inputs=[
+        gr.components.Textbox(lines=2, placeholder="Type your text here..."),
+        gr.components.Slider(minimum=-2, maximum=2, step=0.1, label="Pitch Shift (Semitones)")
+    ],
+    outputs=gr.components.Audio(type="filepath", label="Generated Speech"),
+    title="Text to Speech Synthesis",
+    description="Type text and convert it to speech using a TTS model. Use the slider to adjust the pitch."
+)
+# Launch the application
+interface.launch()