styletts2_Japanese

Running

App Files Files Community

Respair commited on Mar 28, 2024

Commit

62d3fef

verified ·

1 Parent(s): 895e9c4

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -5

app.py CHANGED Viewed

@@ -38,7 +38,7 @@ for v in voicelist:
 #     v = voice.lower()
 #     # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
 #     return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
-if not torch.cuda.is_available(): INTROTXT += "\n\n### You are on a CPU-only system, inference will be much slower.\n\nYou can use the [online demo](https://huggingface.co/spaces/styletts2/styletts2) for fast inference."
 def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
     if text.strip() == "":
         raise gr.Error("You must enter some text")
@@ -123,12 +123,14 @@ with gr.Blocks() as vctk:
         with gr.Column(scale=1):
             inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
             voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
-            multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", info="Higher gives you more diverse results but not necessarily higher quality - これを増えたらもっとエモーショナルな結果になりますが、クオリティーのいい結果になるとは限らない。", interactive=True)
             # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
         with gr.Column(scale=1):
             btn = gr.Button("Synthesize", variant="primary")
             audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
-            btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
 with gr.Blocks() as clone:
     with gr.Row():
         with gr.Column(scale=1):
@@ -169,9 +171,9 @@ with gr.Blocks(title="StyleTTS 2", css="footer{display:none !important}", theme=
     # gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'Text-guided Inference', 'Long Text [Beta]'])
     gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'do not use this option','Text-guided Inference', 'Long Text [Beta]'])
     gr.Markdown("""
-Demo by [mrfakename](https://twitter.com/realmrfakename). I am not affiliated with the StyleTTS 2 authors.
-Run this demo locally using Docker:
 ```bash
 docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/styletts2-styletts2:latest python app.py

 #     v = voice.lower()
 #     # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
 #     return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
+if not torch.cuda.is_available(): INTROTXT += "\n\n### on CPU, it'll run rather slower, but not too much."
 def synthesize(text, voice, lngsteps, password, progress=gr.Progress()):
     if text.strip() == "":
         raise gr.Error("You must enter some text")
         with gr.Column(scale=1):
             inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
             voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
+            multispeakersteps = gr.Slider(minimum=3, maximum=15, value=3, step=1, label="Diffusion Steps", info="これを増えたらもっとエモーショナルな結果になりますが、クオリティーのいい結果になるとは限らない。", interactive=True)
+            alpha = gr.Slider(minimum=0, maximum=1, value=0.3, step=0.1, label="Alpha", info="Defaults to 0.3", interactive=True)
+            beta = gr.Slider(minimum=0, maximum=1, value=0.7, step=0.1, label="Beta", info="Defaults to 0.7", interactive=True)
             # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
         with gr.Column(scale=1):
             btn = gr.Button("Synthesize", variant="primary")
             audio = gr.Audio(interactive=False, label="Synthesized Audio", waveform_options={'waveform_progress_color': '#3C82F6'})
+            btn.click(synthesize, inputs=[inp, voice, multispeakersteps,alpha,beta], outputs=[audio], concurrency_limit=4)
 with gr.Blocks() as clone:
     with gr.Row():
         with gr.Column(scale=1):
     # gr.TabbedInterface([vctk, clone, lj, longText], ['Multi-Voice', 'Voice Cloning', 'Text-guided Inference', 'Long Text [Beta]'])
     gr.TabbedInterface([vctk, clone, lj], ['Multi-Voice', 'do not use this option','Text-guided Inference', 'Long Text [Beta]'])
     gr.Markdown("""
+the base code was borrowed from -> [mrfakename](https://twitter.com/realmrfakename). Neither of use are affiliated with the StyleTTS 2 authors.
+Run the orginal (English-only) demo locally using Docker:
 ```bash
 docker run -it -p 7860:7860 --platform=linux/amd64 --gpus all registry.hf.space/styletts2-styletts2:latest python app.py