Spaces:

ales
/

ai-audio-books

Running

Aliaksandr

merge dev into main (#13)

f655f69 unverified 8 months ago

2.98 kB

	import typing as t
	from copy import deepcopy

	from dotenv import load_dotenv
	from elevenlabs import VoiceSettings
	from elevenlabs.client import AsyncElevenLabs

	load_dotenv()

	from src.config import ELEVENLABS_API_KEY, logger
	from src.schemas import SoundEffectsParams, TTSParams, TTSTimestampsResponse
	from src.utils import auto_retry

	ELEVEN_CLIENT_ASYNC = AsyncElevenLabs(api_key=ELEVENLABS_API_KEY)


	async def tts_astream(
	voice_id: str, text: str, params: dict \| None = None
	) -> t.AsyncIterator[bytes]:
	params_all = dict(voice_id=voice_id, text=text)

	if params is not None:
	params_all["voice_settings"] = VoiceSettings( # type: ignore
	stability=params.get("stability"),
	similarity_boost=params.get("similarity_boost"),
	style=params.get("style"),
	)

	logger.info(
	f"request to 11labs TTS endpoint with params {params_all} "
	f'for the following text: "{text}"'
	)
	async_iter = ELEVEN_CLIENT_ASYNC.text_to_speech.convert(**params_all)
	async for chunk in async_iter:
	if chunk:
	yield chunk


	@auto_retry
	async def tts_astream_consumed(voice_id: str, text: str, params: dict \| None = None) -> list[bytes]:
	aiterator = tts_astream(voice_id=voice_id, text=text, params=params)
	return [x async for x in aiterator]


	@auto_retry
	async def tts_w_timestamps(params: TTSParams) -> TTSTimestampsResponse:
	async def _tts_w_timestamps(params: TTSParams) -> TTSTimestampsResponse:
	# NOTE: we need to use special `to_dict()` method to ensure pydantic model is converted
	# to dict with proper aliases
	params_dict = params.to_dict()

	params_no_text = deepcopy(params_dict)
	text = params_no_text.pop('text')
	logger.info(
	f"request to 11labs TTS endpoint with params {params_no_text} "
	f'for the following text: "{text}"'
	)

	response_raw = await ELEVEN_CLIENT_ASYNC.text_to_speech.convert_with_timestamps(
	**params_dict
	)

	response_parsed = TTSTimestampsResponse.model_validate(response_raw)
	return response_parsed

	res = await _tts_w_timestamps(params=params)
	return res


	async def sound_generation_astream(params: SoundEffectsParams) -> t.AsyncIterator[bytes]:
	params_no_text = params.model_dump(exclude={"text"})
	logger.info(
	f"request to 11labs sound effect generation with params {params_no_text} "
	f'for the following text: "{params.text}"'
	)

	async_iter = ELEVEN_CLIENT_ASYNC.text_to_sound_effects.convert(
	text=params.text,
	duration_seconds=params.duration_seconds,
	prompt_influence=params.prompt_influence,
	)
	async for chunk in async_iter:
	if chunk:
	yield chunk


	@auto_retry
	async def sound_generation_consumed(params: SoundEffectsParams):
	aiterator = sound_generation_astream(params=params)
	return [x async for x in aiterator]