Spaces:

myshell-ai
/

OpenVoice

Running

App Files Files Community

XuminYu commited on Jan 5, 2024

Commit

a823721

1 Parent(s): 34b2ea3

update app

Browse files

Files changed (1) hide show

app.py +15 -48

app.py CHANGED Viewed

@@ -18,7 +18,7 @@ def audio_to_base64(audio_file):
         base64_data = base64.b64encode(audio_data).decode("utf-8")
     return base64_data
-def predict(prompt, style, audio_file_pth, mic_file_path, use_mic, agree):
     # initialize a empty info
     text_hint = ''
     # agree with the terms
@@ -48,18 +48,7 @@ def predict(prompt, style, audio_file_pth, mic_file_path, use_mic, agree):
             None,
         )
-    if language_predicted == "zh":
-        if style not in ['default']:
-            text_hint += f"[ERROR] The style {style} is not supported for Chinese, which should be in ['default']\n"
-            gr.Warning(f"The style {style} is not supported for Chinese, which should be in ['default']")
-            return (
-                text_hint,
-                None,
-                None,
-            )
-        style = 'cn_' + style
-    else:
         if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
             text_hint += f"[ERROR] The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
             gr.Warning(f"The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
@@ -70,23 +59,18 @@ def predict(prompt, style, audio_file_pth, mic_file_path, use_mic, agree):
             )
         style = 'en_' + style
-    if use_mic == True:
-        if mic_file_path is not None:
-            speaker_wav = mic_file_path
-        else:
-            text_hint += f"[ERROR] Please record your voice with Microphone, or uncheck Use Microphone to use reference audios\n"
-            gr.Warning(
-                "Please record your voice with Microphone, or uncheck Use Microphone to use reference audios"
-            )
             return (
                 text_hint,
                 None,
                 None,
             )
-    else:
-        speaker_wav = audio_file_pth
     if len(prompt) < 2:
         text_hint += f"[ERROR] Please give a longer prompt text \n"
@@ -186,7 +170,7 @@ markdown_table_v2 = """
 """
 content = """
 <div>
-  <strong>For multi-lingual & cross-lingual examples, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb'>this jupyter notebook</a>.</strong>
   This online demo mainly supports <strong>English</strong>. The <em>default</em> style also supports <strong>Chinese</strong>. But OpenVoice can adapt to any other language as long as a base speaker is provided.
 </div>
 """
@@ -197,24 +181,18 @@ examples = [
     [
         "今天天气真好，我们一起出去吃饭吧。",
         'default',
-        "examples/speaker0.mp3",
-        None,
-        False,
         True,
     ],[
         "This audio is generated by open voice with a half-performance model.",
         'whispering',
-        "examples/speaker1.mp3",
-        None,
-        False,
         True,
     ],
     [
         "He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
         'sad',
-        "examples/speaker2.mp3",
-        None,
-        False,
         True,
     ],
 ]
@@ -257,18 +235,7 @@ with gr.Blocks(analytics_enabled=False) as demo:
                 label="Reference Audio",
                 info="Click on the ✎ button to upload your own target speaker audio",
                 type="filepath",
-                value="examples/speaker0.mp3",
-            )
-            mic_gr = gr.Audio(
-                source="microphone",
-                type="filepath",
-                info="Use your microphone to record audio",
-                label="Use Microphone for Reference",
-            )
-            use_mic_gr = gr.Checkbox(
-                label="Use Microphone",
-                value=False,
-                info="Notice: Microphone input may not work properly under traffic",
             )
             tos_gr = gr.Checkbox(
                 label="Agree",
@@ -286,11 +253,11 @@ with gr.Blocks(analytics_enabled=False) as demo:
             gr.Examples(examples,
                         label="Examples",
-                        inputs=[input_text_gr, style_gr, ref_gr, mic_gr, use_mic_gr, tos_gr],
                         outputs=[out_text_gr, audio_gr, ref_audio_gr],
                         fn=predict,
                         cache_examples=False,)
-            tts_button.click(predict, [input_text_gr, style_gr, ref_gr, mic_gr, use_mic_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
 demo.queue()
 demo.launch(debug=True, show_api=True)

         base64_data = base64.b64encode(audio_data).decode("utf-8")
     return base64_data
+def predict(prompt, style, audio_file_pth, agree):
     # initialize a empty info
     text_hint = ''
     # agree with the terms
             None,
         )
+    if language_predicted == "en":
         if style not in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']:
             text_hint += f"[ERROR] The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']\n"
             gr.Warning(f"The style {style} is not supported for English, which should be in ['default', 'whispering', 'shouting', 'excited', 'cheerful', 'terrified', 'angry', 'sad', 'friendly']")
             )
         style = 'en_' + style
+    else:
+        if style not in ['default']:
+            text_hint += f"[ERROR] The style {style} is not supported for Chinese, which should be in ['default']\n"
+            gr.Warning(f"The style {style} is not supported for Chinese, which should be in ['default']")
             return (
                 text_hint,
                 None,
                 None,
             )
+        style = 'cn_' + style
+    speaker_wav = audio_file_pth
     if len(prompt) < 2:
         text_hint += f"[ERROR] Please give a longer prompt text \n"
 """
 content = """
 <div>
+  <strong>If the generated voice does not sound like the reference voice, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/QA.md'>this QnA</a>.</strong> <strong>For multi-lingual & cross-lingual examples, please refer to <a href='https://github.com/myshell-ai/OpenVoice/blob/main/demo_part2.ipynb'>this jupyter notebook</a>.</strong>
   This online demo mainly supports <strong>English</strong>. The <em>default</em> style also supports <strong>Chinese</strong>. But OpenVoice can adapt to any other language as long as a base speaker is provided.
 </div>
 """
     [
         "今天天气真好，我们一起出去吃饭吧。",
         'default',
+        "examples/speaker1.mp3",
         True,
     ],[
         "This audio is generated by open voice with a half-performance model.",
         'whispering',
+        "examples/speaker2.mp3",
         True,
     ],
     [
         "He hoped there would be stew for dinner, turnips and carrots and bruised potatoes and fat mutton pieces to be ladled out in thick, peppered, flour-fattened sauce.",
         'sad',
+        "examples/speaker0.mp3",
         True,
     ],
 ]
                 label="Reference Audio",
                 info="Click on the ✎ button to upload your own target speaker audio",
                 type="filepath",
+                value="examples/speaker2.mp3",
             )
             tos_gr = gr.Checkbox(
                 label="Agree",
             gr.Examples(examples,
                         label="Examples",
+                        inputs=[input_text_gr, style_gr, ref_gr, tos_gr],
                         outputs=[out_text_gr, audio_gr, ref_audio_gr],
                         fn=predict,
                         cache_examples=False,)
+            tts_button.click(predict, [input_text_gr, style_gr, ref_gr, tos_gr], outputs=[out_text_gr, audio_gr, ref_audio_gr])
 demo.queue()
 demo.launch(debug=True, show_api=True)