Cheng Jed
initial commit
c005bf8
import requests
import os
import io
from os import path
from typing import Dict, Literal, TypedDict, Optional
import argparse
import asyncio
import base64
# Environment variables
TTS_CLIENT_ID = os.environ.get('TTS_CLIENT_ID')
TTS_CLIENT_SECRET = os.environ.get('TTS_CLIENT_SECRET')
TTS_API_URL = os.environ.get('TTS_API_URL')
if not TTS_CLIENT_ID or not TTS_CLIENT_SECRET or not TTS_API_URL:
raise ValueError('Missing environment variables')
class TaskResult(TypedDict):
task_id: str
message: str
status: Literal['PENDING', 'SUCCESS', 'FAILED']
audio_url: str # base64 encoded wav audio
class Voice(TypedDict):
name: str
promptText: str
promptAudio: str
voices: Dict[str, Voice] = {
"mk_girl": {
"name": "👧 凱婷",
"promptText": "我決定咗啦,我要做一件到目前為止又或者永遠都唔會再見到我做嘅事。",
"promptAudio": path.join(path.dirname(__file__), "./voices/mk_girl.wav")
},
"doraemon": {
"name": "🥸 全叔",
"promptText": "各位觀眾大家好,我叮噹呢又同你哋見面啦。好多謝咁多年嚟各位嘅捧場同支持。",
"promptAudio": path.join(path.dirname(__file__), "./voices/doraemon3.wav")
},
"周星馳": {
"name": "😈 星爺",
"promptText": "大家好啊,想唔想同我做好朋友啊。",
"promptAudio": path.join(path.dirname(__file__), "./voices/sing.mp3")
}
}
async def tts(input_text: str, voice: Voice) -> str:
"""
Send TTS request with voice information
Args:
input_text: Text to be converted to speech
voice: Voice configuration
Returns:
task_id: ID of the TTS task
"""
files = {
'input_text': (None, input_text),
'prompt_text': (None, voice['promptText']),
'audio': ('prompt.wav', open(voice['promptAudio'], 'rb')),
'speed': (None, '1.0')
}
headers = {
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.post(f"{TTS_API_URL}/api/tts",
files=files,
headers=headers)
response.raise_for_status()
return response.json()['task_id']
async def get_task_result(task_id: str) -> TaskResult:
"""
Get result of TTS task
Args:
task_id: ID of the TTS task
Returns:
Task result information
"""
headers = {
'Content-Type': 'application/json',
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.get(f"{TTS_API_URL}/api/tts/{task_id}",
headers=headers)
response.raise_for_status()
return response.json()
async def main():
parser = argparse.ArgumentParser(description='Text-to-Speech with CosyVoice')
parser.add_argument('--text', help='Text to convert to speech')
parser.add_argument('--voice', '-v', choices=list(voices.keys()), default='mk_girl',
help='Voice to use for synthesis')
parser.add_argument('--output', '-o', default='output.wav',
help='Output audio file path')
args = parser.parse_args()
voice = voices[args.voice]
print(f"Converting text to speech using voice: {voice['name']}")
print(f"Text: {args.text}")
try:
task_id = await tts(args.text, voice)
print(f"TTS request submitted. Task ID: {task_id}")
while True:
result = await get_task_result(task_id)
if result['status'] != 'PENDING':
break
print("Waiting for TTS processing...")
await asyncio.sleep(1)
if result['status'] == 'SUCCESS':
audio_data = result['audio_url']
if ',' in audio_data:
audio_data = audio_data.split(',')[1]
with open(args.output, 'wb') as f:
f.write(base64.b64decode(audio_data))
print(f"Audio saved to {args.output}")
else:
print(f"TTS generation failed: {result['message']}")
except Exception as e:
print(f"Error: {str(e)}")
if __name__ == "__main__":
asyncio.run(main())