Spaces:
Runtime error
Runtime error
File size: 3,410 Bytes
24cdd02 944743c 2708d4a dfe5a3d b3480ec 944743c 24cdd02 b3480ec 24cdd02 d53f698 24cdd02 68e4bce 24cdd02 b3480ec 68e4bce 36a226d 24cdd02 36a226d fe87b84 dfe5a3d 24cdd02 93fc110 68e4bce dfe5a3d cb1604b dfe5a3d 24cdd02 9487e3f 33d5f3b 662b7d6 cb1604b b3480ec 662b7d6 e8582ad b3480ec 24cdd02 b3480ec 33d5f3b b3480ec 24cdd02 7b10e06 24cdd02 cb1604b 24cdd02 cb1604b 24cdd02 33d5f3b 24cdd02 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# Imports
import gradio as gr
import spaces
import torch
import numpy as np
from kokoro import KModel, KPipeline
# Pre-Initialize
DEVICE = "auto"
if DEVICE == "auto":
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"[SYSTEM] | Using {DEVICE} type compute device.")
# Variables
SILENT_THRESHOLD = 0.01
CHAR_LIMIT = 2000
DEFAULT_INPUT = ""
DEFAULT_VOICE = "af_heart"
CHOICES = {
"πΊπΈ πΊ Heart β€οΈ": "af_heart",
"πΊπΈ πΊ Bella π₯": "af_bella",
"πΊπΈ πΊ Nicole π§": "af_nicole",
"πΊπΈ πΊ Aoede": "af_aoede",
"πΊπΈ πΊ Kore": "af_kore",
"πΊπΈ πΊ Sarah": "af_sarah",
"πΊπΈ πΊ Nova": "af_nova",
"πΊπΈ πΊ Sky": "af_sky",
"πΊπΈ πΊ Alloy": "af_alloy",
"πΊπΈ πΊ Jessica": "af_jessica",
"πΊπΈ πΊ River": "af_river",
"πΊπΈ πΉ Michael": "am_michael",
"πΊπΈ πΉ Fenrir": "am_fenrir",
"πΊπΈ πΉ Puck": "am_puck",
"πΊπΈ πΉ Echo": "am_echo",
"πΊπΈ πΉ Eric": "am_eric",
"πΊπΈ πΉ Liam": "am_liam",
"πΊπΈ πΉ Onyx": "am_onyx",
"πΊπΈ πΉ Santa": "am_santa",
"πΊπΈ πΉ Adam": "am_adam",
"π¬π§ πΊ Emma": "bf_emma",
"π¬π§ πΊ Isabella": "bf_isabella",
"π¬π§ πΊ Alice": "bf_alice",
"π¬π§ πΊ Lily": "bf_lily",
"π¬π§ πΉ George": "bm_george",
"π¬π§ πΉ Fable": "bm_fable",
"π¬π§ πΉ Lewis": "bm_lewis",
"π¬π§ πΉ Daniel": "bm_daniel",
}
PIPELINES = {lang: KPipeline(lang_code=lang, model=False) for lang in "ab"}
PIPELINES["a"].g2p.lexicon.golds["kokoro"] = "kΛOkΙΙΉO"
PIPELINES["b"].g2p.lexicon.golds["kokoro"] = "kΛQkΙΙΉQ"
for v in CHOICES.values():
PIPELINES[v[0]].load_voice(v)
MODEL = KModel().eval()
css = '''
.gradio-container{max-width: 560px !important}
h1{text-align:center}
footer {
visibility: hidden
}
'''
# Functions
def trim_silence(audio, threshold=SILENT_THRESHOLD):
abs_audio = np.abs(audio)
indices = np.where(abs_audio > threshold)[0]
if len(indices) == 0: return audio
start = indices[0]
end = indices[-1] + 1
return audio[start:end]
def generate(text=DEFAULT_INPUT, voice=DEFAULT_VOICE, speed=1):
text = text.strip()[:CHAR_LIMIT] + "."
pipeline = PIPELINES[voice[0]]
pack = pipeline.load_voice(voice)
for _, ps, _ in pipeline(text, voice, speed):
ref_s = pack[len(ps) - 1]
audio = MODEL(ps, ref_s, speed)
return (24000, trim_silence(audio.numpy()))
def cloud():
print("[CLOUD] | Space maintained.")
@spaces.GPU()
def gpu():
return
# Initialize
with gr.Blocks(css=css) as main:
with gr.Column():
gr.Markdown("πͺ Instantly generate realistic voices using text input.")
with gr.Column():
input = gr.Textbox(lines=1, value=DEFAULT_INPUT, label="Input")
voice_input = gr.Dropdown(list(CHOICES.items()), value=DEFAULT_VOICE, label="Voice")
speed_input = gr.Slider(minimum=0.5, maximum=2, value=1, step=0.1, label="Speed")
submit = gr.Button("βΆ")
maintain = gr.Button("βοΈ")
with gr.Column():
output = gr.Audio(label="Output")
submit.click(fn=generate, inputs=[input, voice_input, speed_input], outputs=output)
maintain.click(cloud, inputs=[], outputs=[], queue=False)
main.launch(show_api=True) |