Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
CHANGED
@@ -20,12 +20,16 @@ def text_to_speech(text):
|
|
20 |
mel_output, mel_length, alignment = tacotron2.encode_text(text)
|
21 |
|
22 |
# Decode mel spectrogram to waveform
|
23 |
-
# FIX: hifi_gan.decode_batch() returns only 1 value
|
24 |
waveforms = hifi_gan.decode_batch(mel_output)
|
25 |
|
|
|
|
|
|
|
|
|
|
|
26 |
# Save waveform as audio file
|
27 |
audio_path = "output.wav"
|
28 |
-
write(audio_path, 22050,
|
29 |
return audio_path
|
30 |
|
31 |
# Streamlit UI
|
|
|
20 |
mel_output, mel_length, alignment = tacotron2.encode_text(text)
|
21 |
|
22 |
# Decode mel spectrogram to waveform
|
|
|
23 |
waveforms = hifi_gan.decode_batch(mel_output)
|
24 |
|
25 |
+
# Convert waveform to numpy and normalize to int16 range
|
26 |
+
waveform = waveforms.squeeze(1).cpu().numpy()
|
27 |
+
waveform = waveform / max(abs(waveform)) # Normalize to range [-1, 1]
|
28 |
+
waveform = (waveform * 32767).astype("int16") # Scale to int16 range
|
29 |
+
|
30 |
# Save waveform as audio file
|
31 |
audio_path = "output.wav"
|
32 |
+
write(audio_path, 22050, waveform)
|
33 |
return audio_path
|
34 |
|
35 |
# Streamlit UI
|