jarvis / utils /tts.py
megamined's picture
Initial commit
b36a86c
from os import getenv
import requests
from utils.functions import play, stream, save
ELEVEN_API_KEY = getenv("ELEVEN_API_KEY")
CHUNK_SIZE = 1024
ELEVENLABS_STREAM_ENDPOINT = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream?optimize_streaming_latency=3"
ELEVENLABS_ENDPOINT = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
voices = {
"Bella": "EXAVITQu4vr4xnSDxMaL",
"Dorothy": "ThT5KcBeYPX3keUQqHPh",
"Male": "onwK4e9ZLuTAKqWW03F9",
"Chimamanda": "QSKN4kAq766BnZ0ilL0L",
"Ruth": "o9iLaGDMP3YCJcZevdfB",
"Ifeanyi": "iQe5hWADpVlprlflH1k8",
}
class TTS:
def __init__(self, voice_id):
self.voice_id = voice_id
self.headers = {
"Accept": "audio/mpeg",
"Content-Type": "application/json",
"xi-api-key": ELEVEN_API_KEY,
}
def generate(self, text, stream_: bool = False, model="eleven_monolingual_v1"):
data = {
"text": text,
"model_id": model,
"voice_settings": {"stability": 0.5, "similarity_boost": 0.0},
}
url = (
ELEVENLABS_STREAM_ENDPOINT.format(voice_id=self.voice_id)
if stream_
else ELEVENLABS_STREAM_ENDPOINT.format(voice_id=self.voice_id)
)
response = requests.post(
url,
json=data,
headers=self.headers,
stream=stream_,
)
if stream_:
audio_stream = (
chunk for chunk in response.iter_content(chunk_size=CHUNK_SIZE) if chunk
)
return audio_stream
else:
audio = response.content
return audio