Spaces:

hon9kon9ize
/

Cantonese-TTS-playground

Running

File size: 4,351 Bytes

c005bf8

import requests
import os
import io
from os import path
from typing import Dict, Literal, TypedDict, Optional
import argparse
import asyncio
import base64

# Environment variables
TTS_CLIENT_ID = os.environ.get('TTS_CLIENT_ID')
TTS_CLIENT_SECRET = os.environ.get('TTS_CLIENT_SECRET')
TTS_API_URL = os.environ.get('TTS_API_URL')


if not TTS_CLIENT_ID or not TTS_CLIENT_SECRET or not TTS_API_URL:
    raise ValueError('Missing environment variables')

class TaskResult(TypedDict):
    task_id: str
    message: str
    status: Literal['PENDING', 'SUCCESS', 'FAILED']
    audio_url: str  # base64 encoded wav audio

class Voice(TypedDict):
    name: str
    promptText: str
    promptAudio: str

voices: Dict[str, Voice] = {
    "mk_girl": {
        "name": "👧 凱婷",
        "promptText": "我決定咗啦，我要做一件到目前為止又或者永遠都唔會再見到我做嘅事。",
        "promptAudio": path.join(path.dirname(__file__), "./voices/mk_girl.wav")
    },
    "doraemon": {
        "name": "🥸 全叔",
        "promptText": "各位觀眾大家好，我叮噹呢又同你哋見面啦。好多謝咁多年嚟各位嘅捧場同支持。",
        "promptAudio": path.join(path.dirname(__file__), "./voices/doraemon3.wav")
    },
    "周星馳": {
        "name": "😈 星爺",
        "promptText": "大家好啊，想唔想同我做好朋友啊。",
        "promptAudio": path.join(path.dirname(__file__), "./voices/sing.mp3")
    }
}

async def tts(input_text: str, voice: Voice) -> str:
    """
    Send TTS request with voice information
    
    Args:
        input_text: Text to be converted to speech
        voice: Voice configuration
        
    Returns:
        task_id: ID of the TTS task
    """
    files = {
        'input_text': (None, input_text),
        'prompt_text': (None, voice['promptText']),
        'audio': ('prompt.wav', open(voice['promptAudio'], 'rb')),
        'speed': (None, '1.0')
    }
    
    headers = {
        'CF-Access-Client-Id': TTS_CLIENT_ID,
        'CF-Access-Client-Secret': TTS_CLIENT_SECRET
    }
    
    response = requests.post(f"{TTS_API_URL}/api/tts", 
                             files=files,
                             headers=headers)
    
    response.raise_for_status()
    return response.json()['task_id']

async def get_task_result(task_id: str) -> TaskResult:
    """
    Get result of TTS task
    
    Args:
        task_id: ID of the TTS task
        
    Returns:
        Task result information
    """
    headers = {
        'Content-Type': 'application/json',
        'CF-Access-Client-Id': TTS_CLIENT_ID,
        'CF-Access-Client-Secret': TTS_CLIENT_SECRET
    }
    
    response = requests.get(f"{TTS_API_URL}/api/tts/{task_id}", 
                           headers=headers)
    
    response.raise_for_status()
    return response.json()


async def main():
    parser = argparse.ArgumentParser(description='Text-to-Speech with CosyVoice')
    parser.add_argument('--text', help='Text to convert to speech')
    parser.add_argument('--voice', '-v', choices=list(voices.keys()), default='mk_girl',
                        help='Voice to use for synthesis')
    parser.add_argument('--output', '-o', default='output.wav',
                        help='Output audio file path')
    
    args = parser.parse_args()
    voice = voices[args.voice]
    
    print(f"Converting text to speech using voice: {voice['name']}")
    print(f"Text: {args.text}")
    
    try:
        task_id = await tts(args.text, voice)
        print(f"TTS request submitted. Task ID: {task_id}")
        
        while True:
            result = await get_task_result(task_id)
            if result['status'] != 'PENDING':
                break
            print("Waiting for TTS processing...")
            await asyncio.sleep(1)
        
        if result['status'] == 'SUCCESS':
            audio_data = result['audio_url']
            if ',' in audio_data:
                audio_data = audio_data.split(',')[1]
            
            with open(args.output, 'wb') as f:
                f.write(base64.b64decode(audio_data))
            print(f"Audio saved to {args.output}")
        else:
            print(f"TTS generation failed: {result['message']}")
    except Exception as e:
        print(f"Error: {str(e)}")

if __name__ == "__main__":
    asyncio.run(main())