Spaces:
Running
Running
Create realtime_stt_manager.py
Browse files- realtime_stt_manager.py +99 -0
realtime_stt_manager.py
ADDED
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Real-time STT Manager for streaming transcription
|
3 |
+
"""
|
4 |
+
from typing import AsyncIterator, Optional, Dict, Any
|
5 |
+
import asyncio
|
6 |
+
from datetime import datetime
|
7 |
+
import sys
|
8 |
+
|
9 |
+
from stt_interface import STTInterface, STTConfig, TranscriptionResult
|
10 |
+
from config_provider import ConfigProvider
|
11 |
+
from utils import log
|
12 |
+
|
13 |
+
class STTStreamManager:
|
14 |
+
"""Manages real-time STT streaming"""
|
15 |
+
|
16 |
+
def __init__(self):
|
17 |
+
self.stt_provider: Optional[STTInterface] = None
|
18 |
+
self.is_streaming = False
|
19 |
+
self.config = None
|
20 |
+
self.accumulated_text = ""
|
21 |
+
self.last_final_result = None
|
22 |
+
|
23 |
+
async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
|
24 |
+
"""Initialize STT stream manager"""
|
25 |
+
self.stt_provider = stt_provider
|
26 |
+
self.config = STTConfig(
|
27 |
+
language=config.get("language", "tr-TR"),
|
28 |
+
sample_rate=config.get("sample_rate", 16000),
|
29 |
+
encoding=config.get("encoding", "WEBM_OPUS"),
|
30 |
+
enable_punctuation=config.get("enable_punctuation", True),
|
31 |
+
interim_results=config.get("interim_results", True),
|
32 |
+
single_utterance=False, # Important for continuous listening
|
33 |
+
speech_timeout_ms=config.get("speech_timeout_ms", 2000),
|
34 |
+
vad_enabled=config.get("vad_enabled", True),
|
35 |
+
noise_reduction_enabled=config.get("noise_reduction_enabled", True),
|
36 |
+
noise_reduction_level=config.get("noise_reduction_level", 2)
|
37 |
+
)
|
38 |
+
|
39 |
+
# Start streaming session
|
40 |
+
await self.stt_provider.start_streaming(self.config)
|
41 |
+
self.is_streaming = True
|
42 |
+
log("β
STT stream manager initialized")
|
43 |
+
|
44 |
+
async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
|
45 |
+
"""Process audio chunk and yield transcription results"""
|
46 |
+
if not self.is_streaming or not self.stt_provider:
|
47 |
+
log("β οΈ STT not streaming or provider not initialized")
|
48 |
+
return
|
49 |
+
|
50 |
+
try:
|
51 |
+
# Stream audio to STT provider
|
52 |
+
async for result in self.stt_provider.stream_audio(audio_chunk):
|
53 |
+
# Update accumulated text for final results
|
54 |
+
if result.is_final:
|
55 |
+
self.accumulated_text = result.text
|
56 |
+
self.last_final_result = result
|
57 |
+
|
58 |
+
yield result
|
59 |
+
|
60 |
+
except Exception as e:
|
61 |
+
log(f"β STT processing error: {e}")
|
62 |
+
# Yield error result
|
63 |
+
yield TranscriptionResult(
|
64 |
+
text="",
|
65 |
+
is_final=False,
|
66 |
+
confidence=0.0,
|
67 |
+
timestamp=datetime.now().timestamp(),
|
68 |
+
is_interrupt=True
|
69 |
+
)
|
70 |
+
|
71 |
+
async def stop_streaming(self) -> Optional[TranscriptionResult]:
|
72 |
+
"""Stop streaming and get final result"""
|
73 |
+
if not self.is_streaming or not self.stt_provider:
|
74 |
+
return None
|
75 |
+
|
76 |
+
try:
|
77 |
+
self.is_streaming = False
|
78 |
+
final_result = await self.stt_provider.stop_streaming()
|
79 |
+
|
80 |
+
if final_result:
|
81 |
+
self.accumulated_text = final_result.text
|
82 |
+
self.last_final_result = final_result
|
83 |
+
|
84 |
+
log("β
STT streaming stopped")
|
85 |
+
return final_result
|
86 |
+
|
87 |
+
except Exception as e:
|
88 |
+
log(f"β Error stopping STT stream: {e}")
|
89 |
+
return None
|
90 |
+
|
91 |
+
def reset(self):
|
92 |
+
"""Reset accumulated text and state"""
|
93 |
+
self.accumulated_text = ""
|
94 |
+
self.last_final_result = None
|
95 |
+
log("π STT stream manager reset")
|
96 |
+
|
97 |
+
def get_accumulated_text(self) -> str:
|
98 |
+
"""Get all accumulated text from the session"""
|
99 |
+
return self.accumulated_text
|