|
import asyncio |
|
import os |
|
import types |
|
import httpx |
|
|
|
from realtime_ai_character.logger import get_logger |
|
from realtime_ai_character.utils import Singleton |
|
from realtime_ai_character.audio.text_to_speech.base import TextToSpeech |
|
|
|
logger = get_logger(__name__) |
|
DEBUG = False |
|
|
|
config = types.SimpleNamespace(**{ |
|
'default_voice': '21m00Tcm4TlvDq8ikWAM', |
|
'default_female_voice': 'EXAVITQu4vr4xnSDxMaL', |
|
'default_male_voice': 'ErXwobaYiN019PkySvjV', |
|
'chunk_size': 1024, |
|
'url': 'https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream', |
|
'headers': { |
|
'Accept': 'audio/mpeg', |
|
'Content-Type': 'application/json', |
|
'xi-api-key': os.environ['ELEVEN_LABS_API_KEY'] |
|
}, |
|
'data': { |
|
'model_id': 'eleven_monolingual_v1', |
|
'voice_settings': { |
|
'stability': 0.5, |
|
'similarity_boost': 0.75 |
|
} |
|
} |
|
}) |
|
|
|
|
|
class ElevenLabs(Singleton, TextToSpeech): |
|
def __init__(self): |
|
super().__init__() |
|
logger.info("Initializing [ElevenLabs Text To Speech] voices...") |
|
self.voice_ids = { |
|
"Raiden Shogun And Ei": os.environ.get('RAIDEN_VOICE') or config.default_female_voice, |
|
"Loki": os.environ.get('LOKI_VOICE') or config.default_male_voice, |
|
"Reflection Pi": os.environ.get('PI_VOICE') or config.default_female_voice, |
|
"Elon Musk": os.environ.get('ELON_VOICE') or config.default_male_voice, |
|
"Bruce Wayne": os.environ.get('BRUCE_VOICE') or config.default_male_voice, |
|
"Steve Jobs": os.environ.get('JOBS_VOICE') or config.default_male_voice, |
|
"Sam Altman": os.environ.get('SAM_VOICE') or config.default_male_voice, |
|
} |
|
|
|
def get_voice_id(self, name): |
|
return self.voice_ids.get(name, config.default_voice) |
|
|
|
async def stream(self, text, websocket, tts_event: asyncio.Event, characater_name="", first_sentence=False) -> None: |
|
if DEBUG: |
|
return |
|
headers = config.headers |
|
data = { |
|
"text": text, |
|
**config.data, |
|
} |
|
voice_id = self.get_voice_id(characater_name) |
|
url = config.url.format(voice_id=voice_id) |
|
if first_sentence: |
|
url = url + '?optimize_streaming_latency=4' |
|
async with httpx.AsyncClient() as client: |
|
response = await client.post(url, json=data, headers=headers) |
|
async for chunk in response.aiter_bytes(): |
|
await asyncio.sleep(0.1) |
|
if tts_event.is_set(): |
|
|
|
break |
|
await websocket.send_bytes(chunk) |
|
|