Spaces:
Sleeping
Sleeping
File size: 2,447 Bytes
f655f69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
import json
import openai
from elevenlabs import VoiceSettings
from src.config import (
DEFAULT_TTS_SIMILARITY_BOOST,
DEFAULT_TTS_STABILITY,
DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE,
DEFAULT_TTS_STYLE,
OPENAI_API_KEY,
logger,
)
from src.prompts import EMOTION_STABILITY_MODIFICATION
from src.schemas import TTSParams
from src.utils import GPTModels, auto_retry
class TTSParamProcessor:
# TODO: refactor to langchain function (?)
def __init__(self):
self.client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
@staticmethod
def _wrap_results(data: dict, default_text: str) -> TTSParams:
stability = data.get('stability', DEFAULT_TTS_STABILITY)
stability = max(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[0])
stability = min(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[1])
similarity_boost = DEFAULT_TTS_SIMILARITY_BOOST
style = DEFAULT_TTS_STYLE
params = TTSParams(
# NOTE: voice will be set later in the builder pipeline
voice_id='',
text=default_text,
# reference: https://elevenlabs.io/docs/speech-synthesis/voice-settings
voice_settings=VoiceSettings(
stability=stability,
similarity_boost=similarity_boost,
style=style,
use_speaker_boost=False,
),
)
return params
@auto_retry
async def run(self, text: str) -> TTSParams:
text_prepared = text.strip()
completion = await self.client.chat.completions.create(
model=GPTModels.GPT_4o,
messages=[
{"role": "system", "content": EMOTION_STABILITY_MODIFICATION},
{"role": "user", "content": text_prepared},
],
response_format={"type": "json_object"},
)
chatgpt_output = completion.choices[0].message.content
if chatgpt_output is None:
raise ValueError(f'received None as openai response content')
try:
output_dict = json.loads(chatgpt_output)
logger.info(f"TTS text processing succeeded: {output_dict}")
except json.JSONDecodeError as e:
logger.exception(f"Error in parsing LLM output: '{chatgpt_output}'")
raise e
output_wrapped = self._wrap_results(output_dict, default_text=text_prepared)
return output_wrapped
|