File size: 4,351 Bytes
c005bf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
import requests
import os
import io
from os import path
from typing import Dict, Literal, TypedDict, Optional
import argparse
import asyncio
import base64
# Environment variables
TTS_CLIENT_ID = os.environ.get('TTS_CLIENT_ID')
TTS_CLIENT_SECRET = os.environ.get('TTS_CLIENT_SECRET')
TTS_API_URL = os.environ.get('TTS_API_URL')
if not TTS_CLIENT_ID or not TTS_CLIENT_SECRET or not TTS_API_URL:
raise ValueError('Missing environment variables')
class TaskResult(TypedDict):
task_id: str
message: str
status: Literal['PENDING', 'SUCCESS', 'FAILED']
audio_url: str # base64 encoded wav audio
class Voice(TypedDict):
name: str
promptText: str
promptAudio: str
voices: Dict[str, Voice] = {
"mk_girl": {
"name": "👧 凱婷",
"promptText": "我決定咗啦,我要做一件到目前為止又或者永遠都唔會再見到我做嘅事。",
"promptAudio": path.join(path.dirname(__file__), "./voices/mk_girl.wav")
},
"doraemon": {
"name": "🥸 全叔",
"promptText": "各位觀眾大家好,我叮噹呢又同你哋見面啦。好多謝咁多年嚟各位嘅捧場同支持。",
"promptAudio": path.join(path.dirname(__file__), "./voices/doraemon3.wav")
},
"周星馳": {
"name": "😈 星爺",
"promptText": "大家好啊,想唔想同我做好朋友啊。",
"promptAudio": path.join(path.dirname(__file__), "./voices/sing.mp3")
}
}
async def tts(input_text: str, voice: Voice) -> str:
"""
Send TTS request with voice information
Args:
input_text: Text to be converted to speech
voice: Voice configuration
Returns:
task_id: ID of the TTS task
"""
files = {
'input_text': (None, input_text),
'prompt_text': (None, voice['promptText']),
'audio': ('prompt.wav', open(voice['promptAudio'], 'rb')),
'speed': (None, '1.0')
}
headers = {
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.post(f"{TTS_API_URL}/api/tts",
files=files,
headers=headers)
response.raise_for_status()
return response.json()['task_id']
async def get_task_result(task_id: str) -> TaskResult:
"""
Get result of TTS task
Args:
task_id: ID of the TTS task
Returns:
Task result information
"""
headers = {
'Content-Type': 'application/json',
'CF-Access-Client-Id': TTS_CLIENT_ID,
'CF-Access-Client-Secret': TTS_CLIENT_SECRET
}
response = requests.get(f"{TTS_API_URL}/api/tts/{task_id}",
headers=headers)
response.raise_for_status()
return response.json()
async def main():
parser = argparse.ArgumentParser(description='Text-to-Speech with CosyVoice')
parser.add_argument('--text', help='Text to convert to speech')
parser.add_argument('--voice', '-v', choices=list(voices.keys()), default='mk_girl',
help='Voice to use for synthesis')
parser.add_argument('--output', '-o', default='output.wav',
help='Output audio file path')
args = parser.parse_args()
voice = voices[args.voice]
print(f"Converting text to speech using voice: {voice['name']}")
print(f"Text: {args.text}")
try:
task_id = await tts(args.text, voice)
print(f"TTS request submitted. Task ID: {task_id}")
while True:
result = await get_task_result(task_id)
if result['status'] != 'PENDING':
break
print("Waiting for TTS processing...")
await asyncio.sleep(1)
if result['status'] == 'SUCCESS':
audio_data = result['audio_url']
if ',' in audio_data:
audio_data = audio_data.split(',')[1]
with open(args.output, 'wb') as f:
f.write(base64.b64decode(audio_data))
print(f"Audio saved to {args.output}")
else:
print(f"TTS generation failed: {result['message']}")
except Exception as e:
print(f"Error: {str(e)}")
if __name__ == "__main__":
asyncio.run(main()) |