import os import requests from pathlib import Path from typing import List import nls class CosyVoiceSynthesizer: def __init__(self) -> None: self.app_key = os.environ.get('ALIYUN_APP_KEY') def call(self, save_file, transcript, voice="longyuan", sample_rate=16000): writer = open(save_file, "wb") return_data = b'' def write_data(data, *args): nonlocal return_data return_data += data if writer is not None: writer.write(data) def raise_error(error, *args): raise RuntimeError( f'Synthesizing speech failed with error: {error}') def close_file(*args): if writer is not None: writer.close() response = requests.get("https://alice-open.oss-cn-zhangjiakou.aliyuncs.com/nls_token.txt") token = response.text.strip() sdk = nls.NlsStreamInputTtsSynthesizer( url='wss://nls-gateway-cn-beijing.aliyuncs.com/ws/v1', token=token, appkey=self.app_key, on_data=write_data, on_error=raise_error, on_close=close_file, ) sdk.startStreamInputTts(voice=voice, sample_rate=sample_rate, aformat='wav') sdk.sendStreamInputTts(transcript,) sdk.stopStreamInputTts() class CosyVoiceAgent: def __init__(self, config) -> None: self.config = config def call(self, pages: List, device: str, save_path: str): save_path = Path(save_path) generation_agent = CosyVoiceSynthesizer() for idx, page in enumerate(pages): generation_agent.call( save_file=save_path / f"p{idx + 1}.wav", transcript=page, **self.config["call_cfg"] ) return { "modality": "speech" }