Spaces:
Runtime error
Runtime error
File size: 3,537 Bytes
24cdd02 944743c 2708d4a dfe5a3d b3480ec 944743c 24cdd02 b3480ec 2708d4a 24cdd02 b3480ec 36a226d 24cdd02 2708d4a 36a226d 2708d4a 36a226d dfe5a3d 24cdd02 dfe5a3d 24cdd02 9487e3f 33d5f3b 2708d4a b3480ec 33d5f3b dfe5a3d b3480ec 24cdd02 b3480ec 33d5f3b b3480ec 24cdd02 33d5f3b 24cdd02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# Imports
import gradio as gr
import spaces
import torch
import numpy as np
from kokoro import KModel, KPipeline
# Pre-Initialize
DEVICE = "auto"
if DEVICE == "auto":
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[SYSTEM] | Using {DEVICE} type compute device.")
torch.set_num_threads(4)
# Variables
CHAR_LIMIT = 2000
DEFAULT_INPUT = ""
DEFAULT_VOICE = "af_heart"
CHOICES = {
"πΊπΈ πΊ Heart β€οΈ": "af_heart",
"πΊπΈ πΊ Bella π₯": "af_bella",
"πΊπΈ πΊ Nicole π§": "af_nicole",
"πΊπΈ πΊ Aoede": "af_aoede",
"πΊπΈ πΊ Kore": "af_kore",
"πΊπΈ πΊ Sarah": "af_sarah",
"πΊπΈ πΊ Nova": "af_nova",
"πΊπΈ πΊ Sky": "af_sky",
"πΊπΈ πΊ Alloy": "af_alloy",
"πΊπΈ πΊ Jessica": "af_jessica",
"πΊπΈ πΊ River": "af_river",
"πΊπΈ πΉ Michael": "am_michael",
"πΊπΈ πΉ Fenrir": "am_fenrir",
"πΊπΈ πΉ Puck": "am_puck",
"πΊπΈ πΉ Echo": "am_echo",
"πΊπΈ πΉ Eric": "am_eric",
"πΊπΈ πΉ Liam": "am_liam",
"πΊπΈ πΉ Onyx": "am_onyx",
"πΊπΈ πΉ Santa": "am_santa",
"πΊπΈ πΉ Adam": "am_adam",
"π¬π§ πΊ Emma": "bf_emma",
"π¬π§ πΊ Isabella": "bf_isabella",
"π¬π§ πΊ Alice": "bf_alice",
"π¬π§ πΊ Lily": "bf_lily",
"π¬π§ πΉ George": "bm_george",
"π¬π§ πΉ Fable": "bm_fable",
"π¬π§ πΉ Lewis": "bm_lewis",
"π¬π§ πΉ Daniel": "bm_daniel",
}
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kΛOkΙΙΉO"
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kΛQkΙΙΉQ"
VOICE_PACKS = {}
for v in CHOICES.values():
VOICE_PACKS[v] = PIPELINES[v[0]].load_voice(v)
model_instance = KModel().to(DEVICE).eval()
try:
MODEL = torch.jit.script(model_instance)
except Exception as e:
print("torch.jit.script failed, using original model:", e)
MODEL = model_instance
css = '''
.gradio-container{max-width: 560px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
def trim_silence(audio, threshold=0.001):
abs_audio = np.abs(audio)
indices = np.where(abs_audio > threshold)[0]
if len(indices) == 0:
return audio
start = indices[0]
end = indices[-1] + 1
return audio[start:end]
# Functions
def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
text = text.strip()[:CHAR_LIMIT] + "."
pipeline = PIPELINES[voice[0]]
pack = VOICE_PACKS[voice]
for _, ps, _ in pipeline(text, voice, speed):
ref_s = pack[len(ps) - 1]
audio = MODEL(ps, ref_s, speed)
audio_np = audio.numpy()
trimmed_audio = trim_silence(audio_np)
return (24000, trimmed_audio)
def cloud():
print("[CLOUD] | Space maintained.")
@spaces.GPU()
def gpu():
return
# Initialize
with gr.Blocks(css=css) as main:
with gr.Column():
input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
submit = gr.Button("βΆ")
maintain = gr.Button("βοΈ")
with gr.Column():
output = gr.Audio(label="Output")
submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
maintain.click(cloud, inputs=[], outputs=[], queue=False)
main.launch(show_api=True) |