Spaces:
Running
Running
import argparse | |
import logging | |
import sys | |
import time | |
import wave | |
from pathlib import Path | |
from typing import Any, Dict | |
from . import PiperVoice | |
from .download import ensure_voice_exists, find_voice, get_voices | |
_FILE = Path(__file__) | |
_DIR = _FILE.parent | |
_LOGGER = logging.getLogger(_FILE.stem) | |
def main() -> None: | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-m", "--model", required=True, help="Path to Onnx model file") | |
parser.add_argument("-c", "--config", help="Path to model config file") | |
parser.add_argument( | |
"-f", | |
"--output-file", | |
"--output_file", | |
help="Path to output WAV file (default: stdout)", | |
) | |
parser.add_argument( | |
"-d", | |
"--output-dir", | |
"--output_dir", | |
help="Path to output directory (default: cwd)", | |
) | |
parser.add_argument( | |
"--output-raw", | |
"--output_raw", | |
action="store_true", | |
help="Stream raw audio to stdout", | |
) | |
# | |
parser.add_argument("-s", "--speaker", type=int, help="Id of speaker (default: 0)") | |
parser.add_argument( | |
"--length-scale", "--length_scale", type=float, help="Phoneme length" | |
) | |
parser.add_argument( | |
"--noise-scale", "--noise_scale", type=float, help="Generator noise" | |
) | |
parser.add_argument( | |
"--noise-w", "--noise_w", type=float, help="Phoneme width noise" | |
) | |
# | |
parser.add_argument("--cuda", action="store_true", help="Use GPU") | |
# | |
parser.add_argument( | |
"--sentence-silence", | |
"--sentence_silence", | |
type=float, | |
default=0.0, | |
help="Seconds of silence after each sentence", | |
) | |
# | |
parser.add_argument( | |
"--data-dir", | |
"--data_dir", | |
action="append", | |
default=[str(Path.cwd())], | |
help="Data directory to check for downloaded models (default: current directory)", | |
) | |
parser.add_argument( | |
"--download-dir", | |
"--download_dir", | |
help="Directory to download voices into (default: first data dir)", | |
) | |
# | |
parser.add_argument( | |
"--update-voices", | |
action="store_true", | |
help="Download latest voices.json during startup", | |
) | |
# | |
parser.add_argument( | |
"--debug", action="store_true", help="Print DEBUG messages to console" | |
) | |
args = parser.parse_args() | |
logging.basicConfig(level=logging.DEBUG if args.debug else logging.INFO) | |
_LOGGER.debug(args) | |
if not args.download_dir: | |
# Download to first data directory by default | |
args.download_dir = args.data_dir[0] | |
# Download voice if file doesn't exist | |
model_path = Path(args.model) | |
if not model_path.exists(): | |
# Load voice info | |
voices_info = get_voices(args.download_dir, update_voices=args.update_voices) | |
# Resolve aliases for backwards compatibility with old voice names | |
aliases_info: Dict[str, Any] = {} | |
for voice_info in voices_info.values(): | |
for voice_alias in voice_info.get("aliases", []): | |
aliases_info[voice_alias] = {"_is_alias": True, **voice_info} | |
voices_info.update(aliases_info) | |
ensure_voice_exists(args.model, args.data_dir, args.download_dir, voices_info) | |
args.model, args.config = find_voice(args.model, args.data_dir) | |
# Load voice | |
voice = PiperVoice.load(args.model, config_path=args.config, use_cuda=args.cuda) | |
synthesize_args = { | |
"speaker_id": args.speaker, | |
"length_scale": args.length_scale, | |
"noise_scale": args.noise_scale, | |
"noise_w": args.noise_w, | |
"sentence_silence": args.sentence_silence, | |
} | |
if args.output_raw: | |
# Read line-by-line | |
for line in sys.stdin: | |
line = line.strip() | |
if not line: | |
continue | |
# Write raw audio to stdout as its produced | |
audio_stream = voice.synthesize_stream_raw(line, **synthesize_args) | |
for audio_bytes in audio_stream: | |
sys.stdout.buffer.write(audio_bytes) | |
sys.stdout.buffer.flush() | |
elif args.output_dir: | |
output_dir = Path(args.output_dir) | |
output_dir.mkdir(parents=True, exist_ok=True) | |
# Read line-by-line | |
for line in sys.stdin: | |
line = line.strip() | |
if not line: | |
continue | |
wav_path = output_dir / f"{time.monotonic_ns()}.wav" | |
with wave.open(str(wav_path), "wb") as wav_file: | |
voice.synthesize(line, wav_file, **synthesize_args) | |
_LOGGER.info("Wrote %s", wav_path) | |
else: | |
# Read entire input | |
text = sys.stdin.read() | |
if (not args.output_file) or (args.output_file == "-"): | |
# Write to stdout | |
with wave.open(sys.stdout.buffer, "wb") as wav_file: | |
voice.synthesize(text, wav_file, **synthesize_args) | |
else: | |
# Write to file | |
with wave.open(args.output_file, "wb") as wav_file: | |
voice.synthesize(text, wav_file, **synthesize_args) | |
if __name__ == "__main__": | |
main() | |