Tonic commited on
Commit
9488c79
1 Parent(s): e660ba9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -3
app.py CHANGED
@@ -4,6 +4,8 @@ import gradio as gr
4
  import os
5
  from whisperspeech.pipeline import Pipeline
6
  import torch
 
 
7
  import torch.nn.functional as F
8
  from whisperspeech.languages import LANGUAGES
9
  from whisperspeech.pipeline import Pipeline
@@ -32,8 +34,8 @@ def whisper_speech_demo(text, lang, speaker_audio=None, mix_lang=None, mix_text=
32
 
33
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
34
  tmp_file_name = tmp_file.name
35
- with open(tmp_file_name, 'wb') as file:
36
- file.write(audio_data)
37
 
38
  return tmp_file_name
39
 
@@ -44,7 +46,7 @@ with gr.Blocks() as demo:
44
  lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
45
  speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
46
  with gr.Row():
47
- mix_lang_input = gr.Textbox(label="Mixed Languages (optional, comma-separated)", placeholder="e.g., en,pl")
48
  mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć")
49
  with gr.Row():
50
  submit_button = gr.Button("Generate Speech")
 
4
  import os
5
  from whisperspeech.pipeline import Pipeline
6
  import torch
7
+ import soundfile as sf
8
+ import numpy as np
9
  import torch.nn.functional as F
10
  from whisperspeech.languages import LANGUAGES
11
  from whisperspeech.pipeline import Pipeline
 
34
 
35
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
36
  tmp_file_name = tmp_file.name
37
+ audio_np = audio_data.numpy() # Convert tensor to numpy array
38
+ sf.write(tmp_file_name, audio_np, 22050) # Assuming a sample rate of 22050 Hz
39
 
40
  return tmp_file_name
41
 
 
46
  lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
47
  speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
48
  with gr.Row():
49
+ mix_lang_input = gr.CheckboxGroup(choices=list(LANGUAGES.keys()), label="Mixed Languages (optional)")
50
  mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć")
51
  with gr.Row():
52
  submit_button = gr.Button("Generate Speech")