whisperspeech

Paused

Tonic commited on Jan 25, 2024

Commit

9d8f293

verified ·

1 Parent(s): af0a5f1

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -89,10 +89,13 @@ def whisper_speech_demo(multilingual_text, speaker_audio):
         audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
         print("Audio segment shape:", audio_np.shape)  # Debug statement
         audio_segments.append(audio_np)
-    concatenated_audio = concatenate_audio_segments(audio_segments)
-    print("Final concatenated audio shape:", concatenated_audio.shape)  # Debug statement
     concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
-    return concatenated_audio[0]
 with gr.Blocks() as demo:
     gr.Markdown(title)

         audio_np = generate_segment_audio(text_str, lang, speaker_url, pipe)
         print("Audio segment shape:", audio_np.shape)  # Debug statement
         audio_segments.append(audio_np)
+    # Normalize the concatenated audio
     concatenated_audio = concatenated_audio / np.max(np.abs(concatenated_audio))
+    # Write the audio data to a temporary file and return the file path
+    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp_file:
+        sf.write(tmp_file.name, concatenated_audio.T, 24000, format='WAV', subtype='PCM_16')
+        return tmp_file.name
 with gr.Blocks() as demo:
     gr.Markdown(title)