""" STT (Speech-to-Text) Interface and Data Models """ from abc import ABC, abstractmethod from typing import Optional, Dict, Any, AsyncIterator, List from dataclasses import dataclass from enum import Enum import json class STTEngineType(Enum): NO_STT = "no_stt" GOOGLE = "google" AZURE = "azure" AMAZON = "amazon" FLICKER = "flicker" @dataclass class STTConfig: """STT configuration parameters""" language: str = "tr-TR" sample_rate: int = 16000 encoding: str = "WEBM_OPUS" enable_punctuation: bool = True enable_word_timestamps: bool = False model: str = "latest_long" use_enhanced: bool = True single_utterance: bool = False interim_results: bool = True # Voice Activity Detection vad_enabled: bool = True speech_timeout_ms: int = 2000 # Noise reduction noise_reduction_enabled: bool = True noise_reduction_level: int = 2 @dataclass class TranscriptionResult: """Result from STT engine""" text: str is_final: bool confidence: float timestamp: float word_timestamps: Optional[List[Dict]] = None language: Optional[str] = None is_interrupt: bool = False class STTInterface(ABC): """Abstract base class for STT providers""" @abstractmethod async def start_streaming(self, config: STTConfig) -> None: """Start streaming session""" pass @abstractmethod async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]: """Stream audio chunk and get transcription results""" pass @abstractmethod async def stop_streaming(self) -> Optional[TranscriptionResult]: """Stop streaming and get final result""" pass @abstractmethod def supports_realtime(self) -> bool: """Check if provider supports real-time streaming""" pass @abstractmethod def get_supported_languages(self) -> List[str]: """Get list of supported language codes""" pass