|
import comfy.sample as comfy_sample |
|
import numpy as np |
|
import torch |
|
from kokoro_onnx import Kokoro |
|
import logging |
|
import os |
|
import requests |
|
from tqdm import tqdm |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
MODEL_URL = "https://huggingface.co/thewh1teagle/kokoro-onnx/resolve/main/kokoro-v0_19.onnx" |
|
VOICES_URL = "https://huggingface.co/thewh1teagle/kokoro-onnx/resolve/main/voices.json" |
|
|
|
class KokoroTTS: |
|
@classmethod |
|
def INPUT_TYPES(s): |
|
return { |
|
"required": { |
|
"text": ("STRING", {"multiline": True, "default": "Check out BS Labs youtube channel: https://www.youtube.com/channel/UCcYXt5R6tpQgKYxULWYD95Q"}), |
|
"speaker": ( |
|
[ |
|
"af", |
|
"af_sarah", |
|
"af_bella", |
|
"af_nicole", |
|
"af_sky", |
|
"am_adam", |
|
"am_michael", |
|
"bf_emma", |
|
"bf_isabella", |
|
"bm_george", |
|
"bm_lewis", |
|
], |
|
{"default": "af_sarah"}, |
|
), |
|
}, |
|
} |
|
|
|
RETURN_TYPES = ("AUDIO",) |
|
RETURN_NAMES = ("audio",) |
|
|
|
FUNCTION = "generate" |
|
|
|
CATEGORY = "kokoro" |
|
|
|
def __init__(self): |
|
self.kokoro = None |
|
logger.info("Initializing KokoroTTS class.") |
|
|
|
node_dir = os.path.dirname(os.path.abspath(__file__)) |
|
|
|
self.model_path = os.path.join(node_dir, "kokoro-v0_19.onnx") |
|
self.voices_path = os.path.join(node_dir, "voices.json") |
|
|
|
|
|
def generate(self, text, speaker): |
|
|
|
if not os.path.exists(self.model_path) or not os.path.exists(self.voices_path): |
|
logger.error(f"ERROR: model or voice file not found. Please download them manually from {MODEL_URL} and {VOICES_URL} and place them in the same folder as the node. model_path: {self.model_path}, voices_path: {self.voices_path}") |
|
return (None,) |
|
|
|
try: |
|
kokoro = Kokoro(model_path=self.model_path, voices_path=self.voices_path) |
|
except Exception as e: |
|
logger.error(f"ERROR: could not load kokoro-onnx in generate: {e}") |
|
return (None,) |
|
|
|
try: |
|
audio, sample_rate = kokoro.create(text, voice=speaker, speed=1.0, lang="en-us") |
|
except Exception as e: |
|
logger.error(f"ERROR: could not generate speech using kokoro.create. Error: {e}") |
|
return (None,) |
|
|
|
if audio is None: |
|
logger.error("ERROR: the text-to-speech generation did not return audio. Make sure you have a valid text string.") |
|
return (None,) |
|
|
|
|
|
audio_tensor = torch.from_numpy(audio).unsqueeze(0).unsqueeze(0).float() |
|
|
|
logger.info(f"Successfuly generated audio. Audio shape: {audio_tensor.shape}. Audio length: {len(audio)}") |
|
return ({"waveform": audio_tensor, "sample_rate": sample_rate},) |
|
|
|
@classmethod |
|
def IS_CHANGED(cls, text, speaker): |
|
return hash((text, speaker)) |
|
|
|
NODE_CLASS_MAPPINGS = { |
|
"Kokoro TTS": KokoroTTS, |
|
} |