whisperspeech

Paused

Tonic commited on Jan 20, 2024

Commit

9488c79

verified ·

1 Parent(s): e660ba9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,8 @@ import gradio as gr
 import os
 from whisperspeech.pipeline import Pipeline
 import torch
 import torch.nn.functional as F
 from whisperspeech.languages import LANGUAGES
 from whisperspeech.pipeline import Pipeline
@@ -32,8 +34,8 @@ def whisper_speech_demo(text, lang, speaker_audio=None, mix_lang=None, mix_text=
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
         tmp_file_name = tmp_file.name
-        with open(tmp_file_name, 'wb') as file:
-            file.write(audio_data)
     return tmp_file_name
@@ -44,7 +46,7 @@ with gr.Blocks() as demo:
         lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
         speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
     with gr.Row():
-        mix_lang_input = gr.Textbox(label="Mixed Languages (optional, comma-separated)", placeholder="e.g., en,pl")
         mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć")
     with gr.Row():
         submit_button = gr.Button("Generate Speech")

 import os
 from whisperspeech.pipeline import Pipeline
 import torch
+import soundfile as sf
+import numpy as np
 import torch.nn.functional as F
 from whisperspeech.languages import LANGUAGES
 from whisperspeech.pipeline import Pipeline
     with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
         tmp_file_name = tmp_file.name
+        audio_np = audio_data.numpy()  # Convert tensor to numpy array
+        sf.write(tmp_file_name, audio_np, 22050)  # Assuming a sample rate of 22050 Hz
     return tmp_file_name
         lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
         speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
     with gr.Row():
+        mix_lang_input = gr.CheckboxGroup(choices=list(LANGUAGES.keys()), label="Mixed Languages (optional)")
         mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć")
     with gr.Row():
         submit_button = gr.Button("Generate Speech")