ciyidogan commited on
Commit
6aeaf3c
·
verified ·
1 Parent(s): d171a47

Update stt/stt_interface.py

Browse files
Files changed (1) hide show
  1. stt/stt_interface.py +64 -79
stt/stt_interface.py CHANGED
@@ -1,80 +1,65 @@
1
- """
2
- STT (Speech-to-Text) Interface for Flare
3
- """
4
- from abc import ABC, abstractmethod
5
- from typing import Optional, Dict, Any, AsyncIterator, List
6
- from dataclasses import dataclass
7
- from enum import Enum
8
- import json
9
-
10
- class STTEngineType(Enum):
11
- NO_STT = "no_stt"
12
- GOOGLE = "google"
13
- AZURE = "azure"
14
- AMAZON = "amazon"
15
- FLICKER = "flicker"
16
-
17
- @dataclass
18
- class STTConfig:
19
- """STT configuration parameters"""
20
- language: str = "tr-TR"
21
- sample_rate: int = 16000
22
- encoding: str = "WEBM_OPUS"
23
- enable_punctuation: bool = True
24
- enable_word_timestamps: bool = False
25
- model: str = "latest_long"
26
- use_enhanced: bool = True
27
- single_utterance: bool = False
28
- interim_results: bool = True
29
-
30
- # Voice Activity Detection
31
- vad_enabled: bool = True
32
- speech_timeout_ms: int = 2000
33
-
34
- # Noise reduction
35
- noise_reduction_enabled: bool = True
36
- noise_reduction_level: int = 2
37
-
38
- @dataclass
39
- class TranscriptionResult:
40
- """Result from STT engine"""
41
- text: str
42
- is_final: bool
43
- confidence: float
44
- timestamp: float
45
- word_timestamps: Optional[List[Dict]] = None
46
- language: Optional[str] = None
47
- is_interrupt: bool = False
48
-
49
- class STTInterface(ABC):
50
- """Abstract base class for STT providers"""
51
-
52
- @abstractmethod
53
- async def start_streaming(self, config: STTConfig) -> None:
54
- """Start streaming session"""
55
- pass
56
-
57
- @abstractmethod
58
- async def stream_audio(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
59
- """Stream audio chunk and get transcription results"""
60
- pass
61
-
62
- @abstractmethod
63
- async def stop_streaming(self) -> Optional[TranscriptionResult]:
64
- """Stop streaming and get final result"""
65
- pass
66
-
67
- @abstractmethod
68
- def supports_realtime(self) -> bool:
69
- """Check if provider supports real-time streaming"""
70
- pass
71
-
72
- @abstractmethod
73
- def get_supported_languages(self) -> List[str]:
74
- """Get list of supported language codes"""
75
- pass
76
-
77
- @abstractmethod
78
- def get_provider_name(self) -> str:
79
- """Get provider name for logging"""
80
  pass
 
1
+ """
2
+ STT (Speech-to-Text) Interface for Flare - Simple Batch Mode
3
+ """
4
+ from abc import ABC, abstractmethod
5
+ from typing import Optional, List
6
+ from dataclasses import dataclass
7
+ from enum import Enum
8
+
9
+
10
+ class STTEngineType(Enum):
11
+ NO_STT = "no_stt"
12
+ GOOGLE = "google"
13
+ AZURE = "azure"
14
+ AMAZON = "amazon"
15
+ DEEPGRAM = "deepgram"
16
+
17
+
18
+ @dataclass
19
+ class STTConfig:
20
+ """STT configuration parameters"""
21
+ language: str = "tr-TR"
22
+ sample_rate: int = 16000
23
+ encoding: str = "LINEAR16"
24
+ enable_punctuation: bool = True
25
+ enable_word_timestamps: bool = False
26
+ model: str = "latest_long"
27
+ use_enhanced: bool = True
28
+
29
+
30
+ @dataclass
31
+ class TranscriptionResult:
32
+ """Result from STT engine"""
33
+ text: str
34
+ confidence: float
35
+ timestamp: float
36
+ language: Optional[str] = None
37
+ word_timestamps: Optional[List[dict]] = None
38
+
39
+
40
+ class STTInterface(ABC):
41
+ """Abstract base class for STT providers - Simple batch mode"""
42
+
43
+ @abstractmethod
44
+ async def transcribe(self, audio_data: bytes, config: STTConfig) -> Optional[TranscriptionResult]:
45
+ """
46
+ Transcribe audio data
47
+
48
+ Args:
49
+ audio_data: Raw PCM audio data (LINEAR16 format)
50
+ config: STT configuration
51
+
52
+ Returns:
53
+ TranscriptionResult or None if no speech detected
54
+ """
55
+ pass
56
+
57
+ @abstractmethod
58
+ def get_supported_languages(self) -> List[str]:
59
+ """Get list of supported language codes"""
60
+ pass
61
+
62
+ @abstractmethod
63
+ def get_provider_name(self) -> str:
64
+ """Get provider name for logging"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  pass