Spaces:
Running
on
A10G
Running
on
A10G
Add Voice Conversion support to gradio
Browse files
app.py
CHANGED
@@ -3,10 +3,7 @@ import os
|
|
3 |
import gradio as gr
|
4 |
from openai import OpenAI
|
5 |
|
6 |
-
from playdiffusion import PlayDiffusion, InpaintInput, TTSInput
|
7 |
-
from playdiffusion.utils.audio_utils import raw_audio_to_torch_audio
|
8 |
-
from playdiffusion.utils.save_audio import make_16bit_pcm
|
9 |
-
from playdiffusion.utils.voice_resource import VoiceResource
|
10 |
|
11 |
whisper_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
12 |
inpainter = PlayDiffusion()
|
@@ -72,6 +69,9 @@ def create_advanced_options_accordion():
|
|
72 |
use_manual_ratio, audio_token_syllable_ratio)
|
73 |
|
74 |
|
|
|
|
|
|
|
75 |
if __name__ == '__main__':
|
76 |
with gr.Blocks(analytics_enabled=False, title="PlayDiffusion") as demo:
|
77 |
gr.Markdown("## PlayDiffusion")
|
@@ -126,4 +126,21 @@ if __name__ == '__main__':
|
|
126 |
outputs=[tts_output]
|
127 |
)
|
128 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
129 |
demo.launch(share=True)
|
|
|
3 |
import gradio as gr
|
4 |
from openai import OpenAI
|
5 |
|
6 |
+
from playdiffusion import PlayDiffusion, InpaintInput, TTSInput, RVCInput
|
|
|
|
|
|
|
7 |
|
8 |
whisper_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
9 |
inpainter = PlayDiffusion()
|
|
|
69 |
use_manual_ratio, audio_token_syllable_ratio)
|
70 |
|
71 |
|
72 |
+
def speech_rvc(rvc_source_speech, rvc_target_voice):
|
73 |
+
return inpainter.rvc(RVCInput(source_speech=rvc_source_speech, target_voice=rvc_target_voice))
|
74 |
+
|
75 |
if __name__ == '__main__':
|
76 |
with gr.Blocks(analytics_enabled=False, title="PlayDiffusion") as demo:
|
77 |
gr.Markdown("## PlayDiffusion")
|
|
|
126 |
outputs=[tts_output]
|
127 |
)
|
128 |
|
129 |
+
with gr.Tab("Voice Conversion"):
|
130 |
+
gr.Markdown("### Real Time Voice Conversion (works best for english)")
|
131 |
+
rvc_source_speech = gr.Audio(label="Source Conversion Speech",
|
132 |
+
sources=["upload", "microphone"], type="filepath",
|
133 |
+
)
|
134 |
+
rvc_target_voice = gr.Audio(label="Target Voice",
|
135 |
+
sources=["upload", "microphone"], type="filepath",
|
136 |
+
)
|
137 |
+
rvc_submit = gr.Button("Real time Voice Conversion")
|
138 |
+
rvc_output = gr.Audio(label="Converted Speech")
|
139 |
+
|
140 |
+
rvc_submit.click(
|
141 |
+
speech_rvc,
|
142 |
+
inputs=[rvc_source_speech, rvc_target_voice],
|
143 |
+
outputs=[rvc_output]
|
144 |
+
)
|
145 |
+
|
146 |
demo.launch(share=True)
|