Musawir19 commited on
Commit
b41b0e5
·
verified ·
1 Parent(s): a972636

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -20,12 +20,16 @@ def text_to_speech(text):
20
  mel_output, mel_length, alignment = tacotron2.encode_text(text)
21
 
22
  # Decode mel spectrogram to waveform
23
- # FIX: hifi_gan.decode_batch() returns only 1 value
24
  waveforms = hifi_gan.decode_batch(mel_output)
25
 
 
 
 
 
 
26
  # Save waveform as audio file
27
  audio_path = "output.wav"
28
- write(audio_path, 22050, waveforms.squeeze(1).cpu().numpy())
29
  return audio_path
30
 
31
  # Streamlit UI
 
20
  mel_output, mel_length, alignment = tacotron2.encode_text(text)
21
 
22
  # Decode mel spectrogram to waveform
 
23
  waveforms = hifi_gan.decode_batch(mel_output)
24
 
25
+ # Convert waveform to numpy and normalize to int16 range
26
+ waveform = waveforms.squeeze(1).cpu().numpy()
27
+ waveform = waveform / max(abs(waveform)) # Normalize to range [-1, 1]
28
+ waveform = (waveform * 32767).astype("int16") # Scale to int16 range
29
+
30
  # Save waveform as audio file
31
  audio_path = "output.wav"
32
+ write(audio_path, 22050, waveform)
33
  return audio_path
34
 
35
  # Streamlit UI