Spaces:
Building
Building
""" | |
STT (Speech-to-Text) Interface for Flare - Simple Batch Mode | |
""" | |
from abc import ABC, abstractmethod | |
from typing import Optional, List | |
from dataclasses import dataclass | |
from enum import Enum | |
class STTEngineType(Enum): | |
NO_STT = "no_stt" | |
GOOGLE = "google" | |
AZURE = "azure" | |
AMAZON = "amazon" | |
DEEPGRAM = "deepgram" | |
class STTConfig: | |
"""STT configuration parameters""" | |
language: str = "tr-TR" | |
sample_rate: int = 16000 | |
encoding: str = "LINEAR16" | |
enable_punctuation: bool = True | |
enable_word_timestamps: bool = False | |
model: str = "latest_long" | |
use_enhanced: bool = True | |
class TranscriptionResult: | |
"""Result from STT engine""" | |
text: str | |
confidence: float | |
timestamp: float | |
language: Optional[str] = None | |
word_timestamps: Optional[List[dict]] = None | |
class STTInterface(ABC): | |
"""Abstract base class for STT providers - Simple batch mode""" | |
async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]: | |
""" | |
Transcribe audio data | |
Args: | |
audio_data: Raw PCM audio data (LINEAR16 format) | |
config: STT configuration | |
Returns: | |
TranscriptionResult or None if no speech detected | |
""" | |
pass | |
def get_supported_languages(self) -> List[str]: | |
"""Get list of supported language codes""" | |
pass | |
def get_provider_name(self) -> str: | |
"""Get provider name for logging""" | |
pass |