Spaces:

Musawir19
/

Taxt_to_speach

Sleeping

Musawir19 commited on Nov 15, 2024

Commit

b41b0e5

verified ·

1 Parent(s): a972636

Create app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,12 +20,16 @@ def text_to_speech(text):
     mel_output, mel_length, alignment = tacotron2.encode_text(text)
     # Decode mel spectrogram to waveform
-    # FIX: hifi_gan.decode_batch() returns only 1 value
     waveforms = hifi_gan.decode_batch(mel_output)
     # Save waveform as audio file
     audio_path = "output.wav"
-    write(audio_path, 22050, waveforms.squeeze(1).cpu().numpy())
     return audio_path
 # Streamlit UI

     mel_output, mel_length, alignment = tacotron2.encode_text(text)
     # Decode mel spectrogram to waveform
     waveforms = hifi_gan.decode_batch(mel_output)
+    # Convert waveform to numpy and normalize to int16 range
+    waveform = waveforms.squeeze(1).cpu().numpy()
+    waveform = waveform / max(abs(waveform))  # Normalize to range [-1, 1]
+    waveform = (waveform * 32767).astype("int16")  # Scale to int16 range
     # Save waveform as audio file
     audio_path = "output.wav"
+    write(audio_path, 22050, waveform)
     return audio_path
 # Streamlit UI