ciyidogan commited on
Commit
299226b
Β·
verified Β·
1 Parent(s): 47cc12e

Create realtime_stt_manager.py

Browse files
Files changed (1) hide show
  1. realtime_stt_manager.py +99 -0
realtime_stt_manager.py ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Real-time STT Manager for streaming transcription
3
+ """
4
+ from typing import AsyncIterator, Optional, Dict, Any
5
+ import asyncio
6
+ from datetime import datetime
7
+ import sys
8
+
9
+ from stt_interface import STTInterface, STTConfig, TranscriptionResult
10
+ from config_provider import ConfigProvider
11
+ from utils import log
12
+
13
+ class STTStreamManager:
14
+ """Manages real-time STT streaming"""
15
+
16
+ def __init__(self):
17
+ self.stt_provider: Optional[STTInterface] = None
18
+ self.is_streaming = False
19
+ self.config = None
20
+ self.accumulated_text = ""
21
+ self.last_final_result = None
22
+
23
+ async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
24
+ """Initialize STT stream manager"""
25
+ self.stt_provider = stt_provider
26
+ self.config = STTConfig(
27
+ language=config.get("language", "tr-TR"),
28
+ sample_rate=config.get("sample_rate", 16000),
29
+ encoding=config.get("encoding", "WEBM_OPUS"),
30
+ enable_punctuation=config.get("enable_punctuation", True),
31
+ interim_results=config.get("interim_results", True),
32
+ single_utterance=False, # Important for continuous listening
33
+ speech_timeout_ms=config.get("speech_timeout_ms", 2000),
34
+ vad_enabled=config.get("vad_enabled", True),
35
+ noise_reduction_enabled=config.get("noise_reduction_enabled", True),
36
+ noise_reduction_level=config.get("noise_reduction_level", 2)
37
+ )
38
+
39
+ # Start streaming session
40
+ await self.stt_provider.start_streaming(self.config)
41
+ self.is_streaming = True
42
+ log("βœ… STT stream manager initialized")
43
+
44
+ async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
45
+ """Process audio chunk and yield transcription results"""
46
+ if not self.is_streaming or not self.stt_provider:
47
+ log("⚠️ STT not streaming or provider not initialized")
48
+ return
49
+
50
+ try:
51
+ # Stream audio to STT provider
52
+ async for result in self.stt_provider.stream_audio(audio_chunk):
53
+ # Update accumulated text for final results
54
+ if result.is_final:
55
+ self.accumulated_text = result.text
56
+ self.last_final_result = result
57
+
58
+ yield result
59
+
60
+ except Exception as e:
61
+ log(f"❌ STT processing error: {e}")
62
+ # Yield error result
63
+ yield TranscriptionResult(
64
+ text="",
65
+ is_final=False,
66
+ confidence=0.0,
67
+ timestamp=datetime.now().timestamp(),
68
+ is_interrupt=True
69
+ )
70
+
71
+ async def stop_streaming(self) -> Optional[TranscriptionResult]:
72
+ """Stop streaming and get final result"""
73
+ if not self.is_streaming or not self.stt_provider:
74
+ return None
75
+
76
+ try:
77
+ self.is_streaming = False
78
+ final_result = await self.stt_provider.stop_streaming()
79
+
80
+ if final_result:
81
+ self.accumulated_text = final_result.text
82
+ self.last_final_result = final_result
83
+
84
+ log("βœ… STT streaming stopped")
85
+ return final_result
86
+
87
+ except Exception as e:
88
+ log(f"❌ Error stopping STT stream: {e}")
89
+ return None
90
+
91
+ def reset(self):
92
+ """Reset accumulated text and state"""
93
+ self.accumulated_text = ""
94
+ self.last_final_result = None
95
+ log("πŸ”„ STT stream manager reset")
96
+
97
+ def get_accumulated_text(self) -> str:
98
+ """Get all accumulated text from the session"""
99
+ return self.accumulated_text