Kokoro-TTS

Running

App Files Files Community

ishworrsubedii commited on Feb 23

Commit

cc180a5

verified ·

1 Parent(s): a66602a

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -15

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import spaces
 from kokoro import KModel, KPipeline
 import gradio as gr
 import os
@@ -10,17 +9,18 @@ CHAR_LIMIT = None if IS_DUPLICATE else 5000
 CUDA_AVAILABLE = torch.cuda.is_available()
 models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
-pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in 'ab'}
 pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
 pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
-@spaces.GPU(duration=30)
 def forward_gpu(ps, ref_s, speed):
     return models[True](ps, ref_s, speed)
 def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
     text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
-    pipeline = pipelines[voice[0]]
     pack = pipeline.load_voice(voice)
     use_gpu = use_gpu and CUDA_AVAILABLE
     for _, ps, _ in pipeline(text, voice, speed):
@@ -125,9 +125,11 @@ CHOICES = {
 '🇯🇵 🚹 Alpha':'jm_alpha',
 '🇬🇧 🚹 Emma': 'bf_emma'
 }
 for v in CHOICES.values():
-    pipelines[v[0]].load_voice(v)
 TOKEN_NOTE = '''
 💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
@@ -163,18 +165,10 @@ with gr.Blocks() as stream_tab:
         gr.Markdown(STREAM_NOTE)
         gr.DuplicateButton()
-BANNER_TEXT = '''
-[***Kokoro*** **is an open-weight TTS model with 82 million parameters.**](https://huggingface.co/hexgrad/Kokoro-82M)
-As of January 31st, 2025, Kokoro was the most-liked [**TTS model**](https://huggingface.co/models?pipeline_tag=text-to-speech&sort=likes) and the most-liked [**TTS space**](https://huggingface.co/spaces?sort=likes&search=tts) on Hugging Face.
-This demo only showcases English, but you can directly use the model to access other languages.
-'''
 API_OPEN = os.getenv('SPACE_ID') != 'hexgrad/Kokoro-TTS'
 API_NAME = None if API_OPEN else False
 with gr.Blocks() as app:
-    with gr.Row():
-        gr.Markdown(BANNER_TEXT, container=True)
     with gr.Row():
         with gr.Column():
             text = gr.Textbox(label='Input Text', info=f"Up to ~500 characters per Generate, or {'∞' if CHAR_LIMIT is None else CHAR_LIMIT} characters per Stream")

 from kokoro import KModel, KPipeline
 import gradio as gr
 import os
 CUDA_AVAILABLE = torch.cuda.is_available()
 models = {gpu: KModel().to('cuda' if gpu else 'cpu').eval() for gpu in [False] + ([True] if CUDA_AVAILABLE else [])}
+pipelines = {lang_code: KPipeline(lang_code=lang_code, model=False) for lang_code in ['a', 'b', 'hi']}
 pipelines['a'].g2p.lexicon.golds['kokoro'] = 'kˈOkəɹO'
 pipelines['b'].g2p.lexicon.golds['kokoro'] = 'kˈQkəɹQ'
 def forward_gpu(ps, ref_s, speed):
     return models[True](ps, ref_s, speed)
 def generate_first(text, voice='af_heart', speed=1, use_gpu=CUDA_AVAILABLE):
     text = text if CHAR_LIMIT is None else text.strip()[:CHAR_LIMIT]
+    pipeline = pipelines.get(voice[:2], pipelines['a'])
     pack = pipeline.load_voice(voice)
     use_gpu = use_gpu and CUDA_AVAILABLE
     for _, ps, _ in pipeline(text, voice, speed):
 '🇯🇵 🚹 Alpha':'jm_alpha',
 '🇬🇧 🚹 Emma': 'bf_emma'
 }
 for v in CHOICES.values():
+    lang_code = v.split('_')[0]
+    if lang_code in pipelines:
+        pipelines[lang_code].load_voice(v)
 TOKEN_NOTE = '''
 💡 Customize pronunciation with Markdown link syntax and /slashes/ like `[Kokoro](/kˈOkəɹO/)`
         gr.Markdown(STREAM_NOTE)
         gr.DuplicateButton()
 API_OPEN = os.getenv('SPACE_ID') != 'hexgrad/Kokoro-TTS'
 API_NAME = None if API_OPEN else False
 with gr.Blocks() as app:
     with gr.Row():
         with gr.Column():
             text = gr.Textbox(label='Input Text', info=f"Up to ~500 characters per Generate, or {'∞' if CHAR_LIMIT is None else CHAR_LIMIT} characters per Stream")