|
import requests |
|
import os |
|
import io |
|
from os import path |
|
from typing import Dict, Literal, TypedDict, Optional |
|
import argparse |
|
import asyncio |
|
import base64 |
|
|
|
|
|
TTS_CLIENT_ID = os.environ.get('TTS_CLIENT_ID') |
|
TTS_CLIENT_SECRET = os.environ.get('TTS_CLIENT_SECRET') |
|
TTS_API_URL = os.environ.get('TTS_API_URL') |
|
|
|
|
|
if not TTS_CLIENT_ID or not TTS_CLIENT_SECRET or not TTS_API_URL: |
|
raise ValueError('Missing environment variables') |
|
|
|
class TaskResult(TypedDict): |
|
task_id: str |
|
message: str |
|
status: Literal['PENDING', 'SUCCESS', 'FAILED'] |
|
audio_url: str |
|
|
|
class Voice(TypedDict): |
|
name: str |
|
promptText: str |
|
promptAudio: str |
|
|
|
voices: Dict[str, Voice] = { |
|
"mk_girl": { |
|
"name": "👧 凱婷", |
|
"promptText": "我決定咗啦,我要做一件到目前為止又或者永遠都唔會再見到我做嘅事。", |
|
"promptAudio": path.join(path.dirname(__file__), "./voices/mk_girl.wav") |
|
}, |
|
"doraemon": { |
|
"name": "🥸 全叔", |
|
"promptText": "各位觀眾大家好,我叮噹呢又同你哋見面啦。好多謝咁多年嚟各位嘅捧場同支持。", |
|
"promptAudio": path.join(path.dirname(__file__), "./voices/doraemon3.wav") |
|
}, |
|
"周星馳": { |
|
"name": "😈 星爺", |
|
"promptText": "大家好啊,想唔想同我做好朋友啊。", |
|
"promptAudio": path.join(path.dirname(__file__), "./voices/sing.mp3") |
|
} |
|
} |
|
|
|
async def tts(input_text: str, voice: Voice) -> str: |
|
""" |
|
Send TTS request with voice information |
|
|
|
Args: |
|
input_text: Text to be converted to speech |
|
voice: Voice configuration |
|
|
|
Returns: |
|
task_id: ID of the TTS task |
|
""" |
|
files = { |
|
'input_text': (None, input_text), |
|
'prompt_text': (None, voice['promptText']), |
|
'audio': ('prompt.wav', open(voice['promptAudio'], 'rb')), |
|
'speed': (None, '1.0') |
|
} |
|
|
|
headers = { |
|
'CF-Access-Client-Id': TTS_CLIENT_ID, |
|
'CF-Access-Client-Secret': TTS_CLIENT_SECRET |
|
} |
|
|
|
response = requests.post(f"{TTS_API_URL}/api/tts", |
|
files=files, |
|
headers=headers) |
|
|
|
response.raise_for_status() |
|
return response.json()['task_id'] |
|
|
|
async def get_task_result(task_id: str) -> TaskResult: |
|
""" |
|
Get result of TTS task |
|
|
|
Args: |
|
task_id: ID of the TTS task |
|
|
|
Returns: |
|
Task result information |
|
""" |
|
headers = { |
|
'Content-Type': 'application/json', |
|
'CF-Access-Client-Id': TTS_CLIENT_ID, |
|
'CF-Access-Client-Secret': TTS_CLIENT_SECRET |
|
} |
|
|
|
response = requests.get(f"{TTS_API_URL}/api/tts/{task_id}", |
|
headers=headers) |
|
|
|
response.raise_for_status() |
|
return response.json() |
|
|
|
|
|
async def main(): |
|
parser = argparse.ArgumentParser(description='Text-to-Speech with CosyVoice') |
|
parser.add_argument('--text', help='Text to convert to speech') |
|
parser.add_argument('--voice', '-v', choices=list(voices.keys()), default='mk_girl', |
|
help='Voice to use for synthesis') |
|
parser.add_argument('--output', '-o', default='output.wav', |
|
help='Output audio file path') |
|
|
|
args = parser.parse_args() |
|
voice = voices[args.voice] |
|
|
|
print(f"Converting text to speech using voice: {voice['name']}") |
|
print(f"Text: {args.text}") |
|
|
|
try: |
|
task_id = await tts(args.text, voice) |
|
print(f"TTS request submitted. Task ID: {task_id}") |
|
|
|
while True: |
|
result = await get_task_result(task_id) |
|
if result['status'] != 'PENDING': |
|
break |
|
print("Waiting for TTS processing...") |
|
await asyncio.sleep(1) |
|
|
|
if result['status'] == 'SUCCESS': |
|
audio_data = result['audio_url'] |
|
if ',' in audio_data: |
|
audio_data = audio_data.split(',')[1] |
|
|
|
with open(args.output, 'wb') as f: |
|
f.write(base64.b64decode(audio_data)) |
|
print(f"Audio saved to {args.output}") |
|
else: |
|
print(f"TTS generation failed: {result['message']}") |
|
except Exception as e: |
|
print(f"Error: {str(e)}") |
|
|
|
if __name__ == "__main__": |
|
asyncio.run(main()) |