Spaces:

megamined
/

jarvis

Paused

File size: 1,680 Bytes

b36a86c

from os import getenv

import requests

from utils.functions import play, stream, save

ELEVEN_API_KEY = getenv("ELEVEN_API_KEY")
CHUNK_SIZE = 1024
ELEVENLABS_STREAM_ENDPOINT = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}/stream?optimize_streaming_latency=3"
ELEVENLABS_ENDPOINT = "https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"


voices = {
    "Bella": "EXAVITQu4vr4xnSDxMaL",
    "Dorothy": "ThT5KcBeYPX3keUQqHPh",
    "Male": "onwK4e9ZLuTAKqWW03F9",
    "Chimamanda": "QSKN4kAq766BnZ0ilL0L",
    "Ruth": "o9iLaGDMP3YCJcZevdfB",
    "Ifeanyi": "iQe5hWADpVlprlflH1k8",
}


class TTS:
    def __init__(self, voice_id):
        self.voice_id = voice_id
        self.headers = {
            "Accept": "audio/mpeg",
            "Content-Type": "application/json",
            "xi-api-key": ELEVEN_API_KEY,
        }

    def generate(self, text, stream_: bool = False, model="eleven_monolingual_v1"):
        data = {
            "text": text,
            "model_id": model,
            "voice_settings": {"stability": 0.5, "similarity_boost": 0.0},
        }

        url = (
            ELEVENLABS_STREAM_ENDPOINT.format(voice_id=self.voice_id)
            if stream_
            else ELEVENLABS_STREAM_ENDPOINT.format(voice_id=self.voice_id)
        )
        response = requests.post(
            url,
            json=data,
            headers=self.headers,
            stream=stream_,
        )

        if stream_:
            audio_stream = (
                chunk for chunk in response.iter_content(chunk_size=CHUNK_SIZE) if chunk
            )
            return audio_stream
        else:
            audio = response.content
            return audio