Spaces:
Running
Running
update app
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ def audio_to_base64(audio_file):
|
|
| 18 |
base64_data = base64.b64encode(audio_data).decode("utf-8")
|
| 19 |
return base64_data
|
| 20 |
|
| 21 |
-
def predict(prompt, style, audio_file_pth,
|
| 22 |
# initialize a empty info
|
| 23 |
text_hint = ''
|
| 24 |
# agree with the terms
|
|
@@ -48,18 +48,7 @@ def predict(prompt, style, audio_file_pth, mic_file_path, use_mic, agree):
|
|
| 48 |
None,
|
| 49 |
)
|
| 50 |
|
| 51 |
-
if language_predicted == "
|
| 52 |
-
if style not in ['default']:
|
| 53 |
-
text_hint += f"[ERROR] The style {style} is not supported for Chinese, which should be in ['default']\n"
|
| 54 |
-
gr.Warning(f"The style {style} is not supported for Chinese, which should be in ['default']")
|
| 55 |
-
return (
|
| 56 |
-
text_hint,
|
| 57 |
-
None,
|
| 58 |
-
None,
|
| 59 |
-
)
|
| 60 |
-
style = 'cn_' + style
|
| 61 |
-
|
| 62 |
-
else:
|
| 63 |
if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
|
| 64 |
text_hint += f"[ERROR] The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
|
| 65 |
gr.Warning(f"The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
|
|
@@ -70,23 +59,18 @@ def predict(prompt, style, audio_file_pth, mic_file_path, use_mic, agree):
|
|
| 70 |
)
|
| 71 |
style = 'en_' + style
|
| 72 |
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
else:
|
| 78 |
-
text_hint += f"[ERROR] Please record your voice with Microphone, or uncheck Use Microphone to use reference audios\n"
|
| 79 |
-
gr.Warning(
|
| 80 |
-
"Please record your voice with Microphone, or uncheck Use Microphone to use reference audios"
|
| 81 |
-
)
|
| 82 |
return (
|
| 83 |
text_hint,
|
| 84 |
None,
|
| 85 |
None,
|
| 86 |
)
|
|
|
|
| 87 |
|
| 88 |
-
|
| 89 |
-
speaker_wav = audio_file_pth
|
| 90 |
|
| 91 |
if len(prompt) < 2:
|
| 92 |
text_hint += f"[ERROR] Please give a longer prompt text \n"
|
|
@@ -186,7 +170,7 @@ markdown_table_v2 = """
|
|
| 186 |
"""
|
| 187 |
content = """
|
| 188 |
<div>
|
| 189 |
-
<strong>For multi-lingual & cross-lingual examples, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb'>this jupyter notebook</a>.</strong>
|
| 190 |
This online demo mainly supports <strong>English</strong>. The <em>default</em> style also supports <strong>Chinese</strong>. But OpenVoice can adapt to any other language as long as a base speaker is provided.
|
| 191 |
</div>
|
| 192 |
"""
|
|
@@ -197,24 +181,18 @@ examples = [
|
|
| 197 |
[
|
| 198 |
"今天天气真好,我们一起出去吃饭吧。",
|
| 199 |
'default',
|
| 200 |
-
"examples/
|
| 201 |
-
None,
|
| 202 |
-
False,
|
| 203 |
True,
|
| 204 |
],[
|
| 205 |
"This audio is generated by open voice with a half-performance model.",
|
| 206 |
'whispering',
|
| 207 |
-
"examples/
|
| 208 |
-
None,
|
| 209 |
-
False,
|
| 210 |
True,
|
| 211 |
],
|
| 212 |
[
|
| 213 |
"He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
|
| 214 |
'sad',
|
| 215 |
-
"examples/
|
| 216 |
-
None,
|
| 217 |
-
False,
|
| 218 |
True,
|
| 219 |
],
|
| 220 |
]
|
|
@@ -257,18 +235,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 257 |
label="Reference Audio",
|
| 258 |
info="Click on the ✎ button to upload your own target speaker audio",
|
| 259 |
type="filepath",
|
| 260 |
-
value="examples/
|
| 261 |
-
)
|
| 262 |
-
mic_gr = gr.Audio(
|
| 263 |
-
source="microphone",
|
| 264 |
-
type="filepath",
|
| 265 |
-
info="Use your microphone to record audio",
|
| 266 |
-
label="Use Microphone for Reference",
|
| 267 |
-
)
|
| 268 |
-
use_mic_gr = gr.Checkbox(
|
| 269 |
-
label="Use Microphone",
|
| 270 |
-
value=False,
|
| 271 |
-
info="Notice: Microphone input may not work properly under traffic",
|
| 272 |
)
|
| 273 |
tos_gr = gr.Checkbox(
|
| 274 |
label="Agree",
|
|
@@ -286,11 +253,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
|
|
| 286 |
|
| 287 |
gr.Examples(examples,
|
| 288 |
label="Examples",
|
| 289 |
-
inputs=[input_text_gr, style_gr, ref_gr,
|
| 290 |
outputs=[out_text_gr, audio_gr, ref_audio_gr],
|
| 291 |
fn=predict,
|
| 292 |
cache_examples=False,)
|
| 293 |
-
tts_button.click(predict, [input_text_gr, style_gr, ref_gr,
|
| 294 |
|
| 295 |
demo.queue()
|
| 296 |
demo.launch(debug=True, show_api=True)
|
|
|
|
| 18 |
base64_data = base64.b64encode(audio_data).decode("utf-8")
|
| 19 |
return base64_data
|
| 20 |
|
| 21 |
+
def predict(prompt, style, audio_file_pth, agree):
|
| 22 |
# initialize a empty info
|
| 23 |
text_hint = ''
|
| 24 |
# agree with the terms
|
|
|
|
| 48 |
None,
|
| 49 |
)
|
| 50 |
|
| 51 |
+
if language_predicted == "en":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
|
| 53 |
text_hint += f"[ERROR] The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
|
| 54 |
gr.Warning(f"The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
|
|
|
|
| 59 |
)
|
| 60 |
style = 'en_' + style
|
| 61 |
|
| 62 |
+
else:
|
| 63 |
+
if style not in ['default']:
|
| 64 |
+
text_hint += f"[ERROR] The style {style} is not supported for Chinese, which should be in ['default']\n"
|
| 65 |
+
gr.Warning(f"The style {style} is not supported for Chinese, which should be in ['default']")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
return (
|
| 67 |
text_hint,
|
| 68 |
None,
|
| 69 |
None,
|
| 70 |
)
|
| 71 |
+
style = 'cn_' + style
|
| 72 |
|
| 73 |
+
speaker_wav = audio_file_pth
|
|
|
|
| 74 |
|
| 75 |
if len(prompt) < 2:
|
| 76 |
text_hint += f"[ERROR] Please give a longer prompt text \n"
|
|
|
|
| 170 |
"""
|
| 171 |
content = """
|
| 172 |
<div>
|
| 173 |
+
<strong>If the generated voice does not sound like the reference voice, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/QA.md'>this QnA</a>.</strong> <strong>For multi-lingual & cross-lingual examples, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb'>this jupyter notebook</a>.</strong>
|
| 174 |
This online demo mainly supports <strong>English</strong>. The <em>default</em> style also supports <strong>Chinese</strong>. But OpenVoice can adapt to any other language as long as a base speaker is provided.
|
| 175 |
</div>
|
| 176 |
"""
|
|
|
|
| 181 |
[
|
| 182 |
"今天天气真好,我们一起出去吃饭吧。",
|
| 183 |
'default',
|
| 184 |
+
"examples/speaker1.mp3",
|
|
|
|
|
|
|
| 185 |
True,
|
| 186 |
],[
|
| 187 |
"This audio is generated by open voice with a half-performance model.",
|
| 188 |
'whispering',
|
| 189 |
+
"examples/speaker2.mp3",
|
|
|
|
|
|
|
| 190 |
True,
|
| 191 |
],
|
| 192 |
[
|
| 193 |
"He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
|
| 194 |
'sad',
|
| 195 |
+
"examples/speaker0.mp3",
|
|
|
|
|
|
|
| 196 |
True,
|
| 197 |
],
|
| 198 |
]
|
|
|
|
| 235 |
label="Reference Audio",
|
| 236 |
info="Click on the ✎ button to upload your own target speaker audio",
|
| 237 |
type="filepath",
|
| 238 |
+
value="examples/speaker2.mp3",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
)
|
| 240 |
tos_gr = gr.Checkbox(
|
| 241 |
label="Agree",
|
|
|
|
| 253 |
|
| 254 |
gr.Examples(examples,
|
| 255 |
label="Examples",
|
| 256 |
+
inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
|
| 257 |
outputs=[out_text_gr, audio_gr, ref_audio_gr],
|
| 258 |
fn=predict,
|
| 259 |
cache_examples=False,)
|
| 260 |
+
tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
|
| 261 |
|
| 262 |
demo.queue()
|
| 263 |
demo.launch(debug=True, show_api=True)
|