jpc commited on
Commit
40c0874
1 Parent(s): a75599a

Precompute examples, mnor layout adjustments

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -8,6 +8,8 @@ import torchaudio
8
  from pathlib import Path
9
  from whisperspeech.pipeline import Pipeline
10
 
 
 
11
  title = """# 🙋🏻‍♂️ Welcome to Collabora's WhisperSpeech
12
 
13
  WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
@@ -77,7 +79,7 @@ def generate_audio(pipe, segments, speaker, speaker_url, cps=14):
77
  audio = pipe.vocoder.decode(atoks)
78
  return audio.cpu()
79
 
80
- def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
81
  if len(multilingual_text) == 0:
82
  raise gr.Error("Please enter some text for me to speak!")
83
 
@@ -92,6 +94,9 @@ def whisper_speech_demo(multilingual_text, speaker_audio, speaker_url, cps):
92
  # torchaudio.save(mp3, audio, 24000, format='mp3')
93
  # return mp3.getvalue()
94
 
 
 
 
95
  with gr.Blocks() as demo:
96
  gr.Markdown(title)
97
  with gr.Row(equal_height=True):
@@ -101,29 +106,27 @@ with gr.Blocks() as demo:
101
  info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
102
  cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
103
  label="Tempo (in characters per second)")
104
- speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
 
105
  sources=["upload", "microphone"],
106
  type='filepath')
 
107
  gr.Markdown(" \n ") # fixes the bottom overflow from Audio
108
- url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
109
  generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
110
  with gr.Column(scale=1):
111
  output_audio = gr.Audio(label="WhisperSpeech says…")
112
 
113
- with gr.Row():
 
114
  gr.Examples(
115
  examples=text_examples,
116
  inputs=[text_input, url_input],
117
  outputs=[output_audio],
118
  fn=whisper_speech_demo,
119
- cache_examples=False,
120
- label="Try these to get started !🌟🌬️"
121
  )
122
 
123
  generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
124
  gr.Markdown(footer)
125
 
126
- pipe = Pipeline(torch_compile=True)
127
- pipe.generate("WhisperSpeech warmup")
128
-
129
- demo.launch()
 
8
  from pathlib import Path
9
  from whisperspeech.pipeline import Pipeline
10
 
11
+ DEVEL=os.environ.get('DEVEL', False)
12
+
13
  title = """# 🙋🏻‍♂️ Welcome to Collabora's WhisperSpeech
14
 
15
  WhisperSpeech is an Open Source text-to-speech system built by Collabora and LAION by inverting Whisper.
 
79
  audio = pipe.vocoder.decode(atoks)
80
  return audio.cpu()
81
 
82
+ def whisper_speech_demo(multilingual_text, speaker_audio=None, speaker_url="", cps=14):
83
  if len(multilingual_text) == 0:
84
  raise gr.Error("Please enter some text for me to speak!")
85
 
 
94
  # torchaudio.save(mp3, audio, 24000, format='mp3')
95
  # return mp3.getvalue()
96
 
97
+ pipe = Pipeline(torch_compile=not DEVEL)
98
+ # warmup will come from regenerating the examples
99
+
100
  with gr.Blocks() as demo:
101
  gr.Markdown(title)
102
  with gr.Row(equal_height=True):
 
106
  info="You can use `<en>` for English and `<pl>` for Polish, see examples below.")
107
  cps = gr.Slider(value=14, minimum=10, maximum=15, step=.25,
108
  label="Tempo (in characters per second)")
109
+ with gr.Row(equal_height=True):
110
+ speaker_input = gr.Audio(label="Upload or Record Speaker Audio (optional)🌬️💬",
111
  sources=["upload", "microphone"],
112
  type='filepath')
113
+ url_input = gr.Textbox(label="alternatively, you can paste in an audio file URL:")
114
  gr.Markdown(" \n ") # fixes the bottom overflow from Audio
 
115
  generate_button = gr.Button("Try Collabora's WhisperSpeech🌟")
116
  with gr.Column(scale=1):
117
  output_audio = gr.Audio(label="WhisperSpeech says…")
118
 
119
+ with gr.Column():
120
+ gr.Markdown("### Try these examples to get started !🌟🌬️")
121
  gr.Examples(
122
  examples=text_examples,
123
  inputs=[text_input, url_input],
124
  outputs=[output_audio],
125
  fn=whisper_speech_demo,
126
+ cache_examples=not DEVEL,
 
127
  )
128
 
129
  generate_button.click(whisper_speech_demo, inputs=[text_input, speaker_input, url_input, cps], outputs=output_audio)
130
  gr.Markdown(footer)
131
 
132
+ demo.launch(server_port=3000 if DEVEL else None)