ciyidogan commited on
Commit
0da26ae
·
verified ·
1 Parent(s): 1a93df8

Create stt_interface.py

Browse files
Files changed (1) hide show
  1. stt_interface.py +76 -0
stt_interface.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ STT (Speech-to-Text) Interface and Data Models
3
+ """
4
+
5
+ from abc import ABC, abstractmethod
6
+ from typing import Optional, Dict, Any, AsyncIterator, List
7
+ from dataclasses import dataclass
8
+ from enum import Enum
9
+ import json
10
+
11
+ class STTEngineType(Enum):
12
+ NO_STT = "no_stt"
13
+ GOOGLE = "google"
14
+ AZURE = "azure"
15
+ AMAZON = "amazon"
16
+ FLICKER = "flicker"
17
+
18
+ @dataclass
19
+ class STTConfig:
20
+ """STT configuration parameters"""
21
+ language: str = "tr-TR"
22
+ sample_rate: int = 16000
23
+ encoding: str = "WEBM_OPUS"
24
+ enable_punctuation: bool = True
25
+ enable_word_timestamps: bool = False
26
+ model: str = "latest_long"
27
+ use_enhanced: bool = True
28
+ single_utterance: bool = False
29
+ interim_results: bool = True
30
+
31
+ # Voice Activity Detection
32
+ vad_enabled: bool = True
33
+ speech_timeout_ms: int = 2000
34
+
35
+ # Noise reduction
36
+ noise_reduction_enabled: bool = True
37
+ noise_reduction_level: int = 2
38
+
39
+ @dataclass
40
+ class TranscriptionResult:
41
+ """Result from STT engine"""
42
+ text: str
43
+ is_final: bool
44
+ confidence: float
45
+ timestamp: float
46
+ word_timestamps: Optional[List[Dict]] = None
47
+ language: Optional[str] = None
48
+ is_interrupt: bool = False
49
+
50
+ class STTInterface(ABC):
51
+ """Abstract base class for STT providers"""
52
+
53
+ @abstractmethod
54
+ async def start_streaming(self, config: STTConfig) -> None:
55
+ """Start streaming session"""
56
+ pass
57
+
58
+ @abstractmethod
59
+ async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
60
+ """Stream audio chunk and get transcription results"""
61
+ pass
62
+
63
+ @abstractmethod
64
+ async def stop_streaming(self) -> Optional[TranscriptionResult]:
65
+ """Stop streaming and get final result"""
66
+ pass
67
+
68
+ @abstractmethod
69
+ def supports_realtime(self) -> bool:
70
+ """Check if provider supports real-time streaming"""
71
+ pass
72
+
73
+ @abstractmethod
74
+ def get_supported_languages(self) -> List[str]:
75
+ """Get list of supported language codes"""
76
+ pass