Spaces:
Running
on
Zero
Running
on
Zero
import spaces | |
from kokoro import KModel, KPipeline | |
import gradio as gr | |
CHAR_LIMIT = 5000 | |
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"} | |
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kΛOkΙΙΉO" | |
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kΛQkΙΙΉQ" | |
CHOICES = { | |
"πΊπΈ πΊ Heart β€οΈ": "af_heart", | |
"πΊπΈ πΊ Bella π₯": "af_bella", | |
"πΊπΈ πΊ Nicole π§": "af_nicole", | |
"πΊπΈ πΊ Aoede": "af_aoede", | |
"πΊπΈ πΊ Kore": "af_kore", | |
"πΊπΈ πΊ Sarah": "af_sarah", | |
"πΊπΈ πΊ Nova": "af_nova", | |
"πΊπΈ πΊ Sky": "af_sky", | |
"πΊπΈ πΊ Alloy": "af_alloy", | |
"πΊπΈ πΊ Jessica": "af_jessica", | |
"πΊπΈ πΊ River": "af_river", | |
"πΊπΈ πΉ Michael": "am_michael", | |
"πΊπΈ πΉ Fenrir": "am_fenrir", | |
"πΊπΈ πΉ Puck": "am_puck", | |
"πΊπΈ πΉ Echo": "am_echo", | |
"πΊπΈ πΉ Eric": "am_eric", | |
"πΊπΈ πΉ Liam": "am_liam", | |
"πΊπΈ πΉ Onyx": "am_onyx", | |
"πΊπΈ πΉ Santa": "am_santa", | |
"πΊπΈ πΉ Adam": "am_adam", | |
"π¬π§ πΊ Emma": "bf_emma", | |
"π¬π§ πΊ Isabella": "bf_isabella", | |
"π¬π§ πΊ Alice": "bf_alice", | |
"π¬π§ πΊ Lily": "bf_lily", | |
"π¬π§ πΉ George": "bm_george", | |
"π¬π§ πΉ Fable": "bm_fable", | |
"π¬π§ πΉ Lewis": "bm_lewis", | |
"π¬π§ πΉ Daniel": "bm_daniel", | |
} | |
for v in CHOICES.values(): | |
PIPELINES[v[0]].load_voice(v) | |
MODEL = KModel().eval() | |
def generate_first(text, voice="af_heart", speed=1): | |
text = text.strip()[:CHAR_LIMIT] | |
pipeline = PIPELINES[voice[0]] | |
pack = pipeline.load_voice(voice) | |
for _, ps, _ in pipeline(text, voice, speed): | |
ref_s = pack[len(ps) - 1] | |
audio = MODEL(ps, ref_s, speed) | |
return (24000, audio.numpy()) | |
return None | |
def predict(text, voice="af_heart", speed=1): | |
return generate_first(text, voice, speed) | |
def generate_all(text, voice="af_heart", speed=1): | |
text = text.strip()[:CHAR_LIMIT] | |
pipeline = PIPELINES[voice[0]] | |
pack = pipeline.load_voice(voice) | |
for _, ps, _ in pipeline(text, voice, speed): | |
ref_s = pack[len(ps) - 1] | |
audio = MODEL(ps, ref_s, speed) | |
yield 24000, audio.numpy() | |
def gpu(): | |
return | |
with gr.Blocks() as app: | |
with gr.Row(): | |
text_input = gr.Textbox(label="input text") | |
voice_input = gr.Dropdown(list(CHOICES.items()), value="af_heart", label="voice") | |
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="speed") | |
out_audio = gr.Audio(label="output audio", interactive=False, autoplay=True) | |
gen_btn = gr.Button("generate") | |
gen_btn.click(fn=generate_first, inputs=[text_input, voice_input, speed_input], outputs=out_audio) | |
if __name__ == "__main__": | |
app.launch() |