Spaces:
Sleeping
Sleeping
File size: 2,001 Bytes
8035662 d092760 8035662 096bc17 8035662 096bc17 8035662 d092760 8035662 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
from kokoro import KPipeline
import soundfile as sf
import numpy as np
import logging
try:
import spaces
@spaces.GPU(duration=60)
def get_generator(pipeline, text, voice, speed, split_pattern):
return pipeline(text, voice=voice, speed=speed, split_pattern=r"\.")
logging.info("Running TTS in spaces")
except:
logging.info("Spaces not available")
def get_generator(pipeline, text, voice, speed, split_pattern):
return pipeline(text, voice=voice, speed=speed, split_pattern=r"\.")
def generate_audio(
text,
pipeline=KPipeline(lang_code="a"),
voice="af_heart",
speed=1,
save_segments=False,
progress=None,
):
"""
Generate audio from text using Kokoro TTS pipeline
Args:
text (str): Text to convert to speech
lang_code (str): Language code for the TTS model
voice (str): Voice ID to use
speed (float): Speech speed multiplier
save_segments (bool): Whether to save individual audio segments
Returns:
numpy.ndarray: Combined audio data at 24kHz sample rate
"""
try:
pipeline = pipeline.to("cuda")
except:
logging.warning("CUDA not available, using CPU")
generator = get_generator(pipeline, text, voice, speed, split_pattern="\.")
all_audio = []
segments = list(generator) # Get total number of segments
for i, (gs, ps, audio) in enumerate(
progress.tqdm(segments, desc="Generating audio")
):
logging.info("Processing segment")
logging.info(f"Graphemes: {gs}")
logging.info(f"Phonemes: {ps}")
all_audio.append(audio)
if save_segments:
sf.write(f"segment_{i}.wav", audio, 24000)
# Concatenate all audio segments
combined_audio = np.concatenate(all_audio)
return combined_audio
if __name__ == "__main__":
# Example usage
sample_text = "Hello world"
audio_data = generate_audio(sample_text)
sf.write("out.wav", audio_data, 24000)
|