ai-audio-books / src /preprocess_tts_emotions_chain.py
Aliaksandr
merge dev into main (#13)
f655f69 unverified
import json
import openai
from elevenlabs import VoiceSettings
from src.config import (
DEFAULT_TTS_SIMILARITY_BOOST,
DEFAULT_TTS_STABILITY,
DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE,
DEFAULT_TTS_STYLE,
OPENAI_API_KEY,
logger,
)
from src.prompts import EMOTION_STABILITY_MODIFICATION
from src.schemas import TTSParams
from src.utils import GPTModels, auto_retry
class TTSParamProcessor:
# TODO: refactor to langchain function (?)
def __init__(self):
self.client = openai.AsyncOpenAI(api_key=OPENAI_API_KEY)
@staticmethod
def _wrap_results(data: dict, default_text: str) -> TTSParams:
stability = data.get('stability', DEFAULT_TTS_STABILITY)
stability = max(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[0])
stability = min(stability, DEFAULT_TTS_STABILITY_ACCEPTABLE_RANGE[1])
similarity_boost = DEFAULT_TTS_SIMILARITY_BOOST
style = DEFAULT_TTS_STYLE
params = TTSParams(
# NOTE: voice will be set later in the builder pipeline
voice_id='',
text=default_text,
# reference: https://elevenlabs.io/docs/speech-synthesis/voice-settings
voice_settings=VoiceSettings(
stability=stability,
similarity_boost=similarity_boost,
style=style,
use_speaker_boost=False,
),
)
return params
@auto_retry
async def run(self, text: str) -> TTSParams:
text_prepared = text.strip()
completion = await self.client.chat.completions.create(
model=GPTModels.GPT_4o,
messages=[
{"role": "system", "content": EMOTION_STABILITY_MODIFICATION},
{"role": "user", "content": text_prepared},
],
response_format={"type": "json_object"},
)
chatgpt_output = completion.choices[0].message.content
if chatgpt_output is None:
raise ValueError(f'received None as openai response content')
try:
output_dict = json.loads(chatgpt_output)
logger.info(f"TTS text processing succeeded: {output_dict}")
except json.JSONDecodeError as e:
logger.exception(f"Error in parsing LLM output: '{chatgpt_output}'")
raise e
output_wrapped = self._wrap_results(output_dict, default_text=text_prepared)
return output_wrapped