Tonic commited on
Commit
8b6e3fd
1 Parent(s): e27c13f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -19
app.py CHANGED
@@ -33,34 +33,50 @@ def whisper_speech_demo(text, lang, speaker_audio=None, mix_lang=None, mix_text=
33
  else:
34
  audio_data = pipe.generate(text, speaker_url, lang)
35
 
 
 
 
 
36
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
37
  tmp_file_name = tmp_file.name
38
- audio_np = audio_data.cpu().numpy()
39
-
40
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
41
  audio_np = audio_np / np.max(np.abs(audio_np))
42
 
43
- sf.write(tmp_file_name, audio_np, 24000)
44
 
45
  return tmp_file_name
46
 
47
  with gr.Blocks() as demo:
48
  gr.Markdown(title)
49
- with gr.Row():
50
- text_input = gr.Textbox(label="Enter text")
51
- lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
52
- speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
53
- with gr.Row():
54
- mix_lang_input = gr.CheckboxGroup(choices=list(LANGUAGES.keys()), label="Mixed Languages (optional)")
55
- mix_text_input = gr.Textbox(label="Mixed Texts (optional, for mixed languages)", placeholder="e.g., Hello, Cześć")
56
- with gr.Row():
57
- submit_button = gr.Button("Generate Speech")
58
- output_audio = gr.Audio(label="🌬️💬📝WhisperSpeech")
59
-
60
- submit_button.click(
61
- whisper_speech_demo,
62
- inputs=[text_input, lang_input, speaker_input, mix_lang_input, mix_text_input],
63
- outputs=output_audio
64
- )
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
  demo.launch()
 
33
  else:
34
  audio_data = pipe.generate(text, speaker_url, lang)
35
 
36
+ resample_audio = resampler(newsr=24000)
37
+ audio_data_resampled = next(resample_audio([{'sample_rate': 22050, 'samples': audio_data.cpu()}]))['samples_24k']
38
+
39
+ # Normalize
40
  with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp_file:
41
  tmp_file_name = tmp_file.name
42
+ audio_np = audio_data_resampled.numpy() # Convert to numpy array
43
+
44
  if audio_np.max() > 1.0 or audio_np.min() < -1.0:
45
  audio_np = audio_np / np.max(np.abs(audio_np))
46
 
47
+ sf.write(tmp_file_name, audio_np, 24000, 'PCM_24') # Write with a sample rate of 24000 Hz
48
 
49
  return tmp_file_name
50
 
51
  with gr.Blocks() as demo:
52
  gr.Markdown(title)
53
+
54
+ with gr.Tabs():
55
+ with gr.TabItem("Standard TTS"):
56
+ with gr.Row():
57
+ text_input = gr.Textbox(label="Enter text")
58
+ lang_input = gr.Dropdown(choices=list(LANGUAGES.keys()), label="Language")
59
+ speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)", sources=["upload", "microphone"], type="filepath")
60
+ generate_button = gr.Button("Generate Speech")
61
+ output_audio_standard = gr.Audio(label="🌬️💬📝WhisperSpeech")
62
+
63
+ generate_button.click(
64
+ whisper_speech_demo,
65
+ inputs=[text_input, lang_input, speaker_input, None, None],
66
+ outputs=output_audio_standard
67
+ )
68
+
69
+ with gr.TabItem("Mixed Language TTS"):
70
+ with gr.Row():
71
+ mix_text_input = gr.Textbox(label="Enter mixed language text", placeholder="e.g., Hello, Cześć")
72
+ mix_lang_input = gr.CheckboxGroup(choices=list(LANGUAGES.keys()), label="Select Languages")
73
+ mix_generate_button = gr.Button("Generate Mixed Speech")
74
+ output_audio_mixed = gr.Audio(label="🌬️💬📝WhisperSpeech Mixed")
75
+
76
+ mix_generate_button.click(
77
+ whisper_speech_demo,
78
+ inputs=[mix_text_input, None, None, mix_lang_input, mix_text_input],
79
+ outputs=output_audio_mixed
80
+ )
81
 
82
  demo.launch()