Spaces:
Building
Building
File size: 1,615 Bytes
6aeaf3c c1b9e28 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
"""
STT (Speech-to-Text) Interface for Flare - Simple Batch Mode
"""
from abc import ABC, abstractmethod
from typing import Optional, List
from dataclasses import dataclass
from enum import Enum
class STTEngineType(Enum):
NO_STT = "no_stt"
GOOGLE = "google"
AZURE = "azure"
AMAZON = "amazon"
DEEPGRAM = "deepgram"
@dataclass
class STTConfig:
"""STT configuration parameters"""
language: str = "tr-TR"
sample_rate: int = 16000
encoding: str = "LINEAR16"
enable_punctuation: bool = True
enable_word_timestamps: bool = False
model: str = "latest_long"
use_enhanced: bool = True
@dataclass
class TranscriptionResult:
"""Result from STT engine"""
text: str
confidence: float
timestamp: float
language: Optional[str] = None
word_timestamps: Optional[List[dict]] = None
class STTInterface(ABC):
"""Abstract base class for STT providers - Simple batch mode"""
@abstractmethod
async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
"""
Transcribe audio data
Args:
audio_data: Raw PCM audio data (LINEAR16 format)
config: STT configuration
Returns:
TranscriptionResult or None if no speech detected
"""
pass
@abstractmethod
def get_supported_languages(self) -> List[str]:
"""Get list of supported language codes"""
pass
@abstractmethod
def get_provider_name(self) -> str:
"""Get provider name for logging"""
pass |