Spaces:

UcsTurkey
/

flare

Building

App Files Files Community

flare / tts_interface.py

ciyidogan

Update tts_interface.py

559bf23 verified 24 days ago

raw

history blame

5.68 kB

	"""
	TTS Interface and Implementations
	"""

	from abc import ABC, abstractmethod
	from typing import Optional, Dict, Any, Set
	import httpx
	import os
	from datetime import datetime
	import sys

	from tts_preprocessor import TTSPreprocessor

	def log(message: str):
	timestamp = datetime.now().strftime("%H:%M:%S.%f")[:-3]
	print(f"[{timestamp}] {message}")
	sys.stdout.flush()

	class TTSInterface(ABC):
	"""Abstract base class for TTS providers"""

	def __init__(self):
	self.preprocessing_flags: Set[str] = set()
	self.supports_ssml: bool = False

	@abstractmethod
	async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
	"""Convert text to speech and return audio bytes"""
	pass

	@abstractmethod
	def get_supported_voices(self) -> Dict[str, str]:
	"""Get list of supported voices"""
	pass

	def get_preprocessing_flags(self) -> Set[str]:
	"""Get preprocessing flags for this provider"""
	return self.preprocessing_flags

	def supports_ssml_format(self) -> bool:
	"""Check if provider supports SSML"""
	return self.supports_ssml

	class ElevenLabsTTS(TTSInterface):
	"""ElevenLabs TTS implementation"""

	def __init__(self, api_key: str):
	super().__init__()
	self.api_key = api_key.strip() # Başındaki/sonundaki boşlukları temizle
	self.base_url = "https://api.elevenlabs.io/v1"
	self.default_voice_id = "2thYbn2sOGtiTwd9QwWH" # Avencia

	# ElevenLabs için preprocessing gereken alanlar
	self.preprocessing_flags = {
	TTSPreprocessor.PREPROCESS_NUMBERS, # Büyük sayılar
	TTSPreprocessor.PREPROCESS_CURRENCY, # Para birimleri
	TTSPreprocessor.PREPROCESS_TIME, # Saat formatı
	TTSPreprocessor.PREPROCESS_CODES, # PNR kodları
	TTSPreprocessor.PREPROCESS_PERCENTAGE # Yüzdeler
	}

	# tr-TR -> tr dönüşümü
	self.preprocessor = TTSPreprocessor(language="tr")

	async def synthesize(
	self,
	text: str,
	voice_id: Optional[str] = None,
	model_id: Optional[str] = None,
	output_format: Optional[str] = None,
	**kwargs
	) -> bytes:
	"""Convert text to speech using ElevenLabs API"""

	# Preprocess text
	processed_text = self.preprocessor.preprocess(text, self.preprocessing_flags)

	# Use defaults if not provided
	voice_id = voice_id or self.default_voice_id
	model_id = model_id or "eleven_multilingual_v2"
	output_format = output_format or "mp3_44100_128"

	url = f"{self.base_url}/text-to-speech/{voice_id}"

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": self.api_key
	}

	data = {
	"text": processed_text,
	"model_id": model_id,
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.75,
	"style": 0.0,
	"use_speaker_boost": True
	}
	}

	# Add output format to URL if specified
	if output_format:
	url += f"?output_format={output_format}"

	try:
	async with httpx.AsyncClient() as client:
	log(f"🎤 ElevenLabs TTS request: voice={voice_id}, model={model_id}")
	log(f"📝 Text (first 100 chars): {processed_text[:100]}...")

	response = await client.post(
	url,
	json=data,
	headers=headers,
	timeout=30.0
	)

	response.raise_for_status()
	audio_data = response.content

	log(f"✅ ElevenLabs TTS returned {len(audio_data)} bytes")
	return audio_data

	except httpx.HTTPStatusError as e:
	log(f"❌ ElevenLabs API error: {e.response.status_code} - {e.response.text}")
	raise
	except Exception as e:
	log(f"❌ TTS synthesis error: {e}")
	raise

	def get_supported_voices(self) -> Dict[str, str]:
	"""Get default voices - full list can be fetched from API"""
	return {
	"21m00Tcm4TlvDq8ikWAM": "Rachel (Female)",
	"EXAVITQu4vr4xnSDxMaL": "Bella (Female)",
	"ErXwobaYiN019PkySvjV": "Antoni (Male)",
	"VR6AewLTigWG4xSOukaG": "Arnold (Male)",
	"pNInz6obpgDQGcFmaJgB": "Adam (Male)",
	"yoZ06aMxZJJ28mfd3POQ": "Sam (Male)",
	}

	class BlazeTTS(TTSInterface):
	"""Placeholder for future Blaze TTS implementation"""

	def __init__(self, api_key: str):
	super().__init__()
	self.api_key = api_key

	async def synthesize(self, text: str, voice_id: Optional[str] = None, **kwargs) -> bytes:
	raise NotImplementedError("Blaze TTS not implemented yet")

	def get_supported_voices(self) -> Dict[str, str]:
	return {}


	def create_tts_provider(engine: str, api_key: Optional[str] = None) -> Optional[TTSInterface]:
	"""Factory function to create TTS provider instances"""
	if engine == "elevenlabs" and api_key:
	return ElevenLabsTTS(api_key)
	elif engine == "blaze" and api_key:
	return BlazeTTS(api_key)
	elif engine == "no_tts":
	return None
	else:
	log(f"⚠️ Unknown or unconfigured TTS engine: {engine}")
	return None