Spaces:
Building
Building
File size: 3,800 Bytes
299226b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
"""
Real-time STT Manager for streaming transcription
"""
from typing import AsyncIterator, Optional, Dict, Any
import asyncio
from datetime import datetime
import sys
from stt_interface import STTInterface, STTConfig, TranscriptionResult
from config_provider import ConfigProvider
from utils import log
class STTStreamManager:
"""Manages real-time STT streaming"""
def __init__(self):
self.stt_provider: Optional[STTInterface] = None
self.is_streaming = False
self.config = None
self.accumulated_text = ""
self.last_final_result = None
async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
"""Initialize STT stream manager"""
self.stt_provider = stt_provider
self.config = STTConfig(
language=config.get("language", "tr-TR"),
sample_rate=config.get("sample_rate", 16000),
encoding=config.get("encoding", "WEBM_OPUS"),
enable_punctuation=config.get("enable_punctuation", True),
interim_results=config.get("interim_results", True),
single_utterance=False, # Important for continuous listening
speech_timeout_ms=config.get("speech_timeout_ms", 2000),
vad_enabled=config.get("vad_enabled", True),
noise_reduction_enabled=config.get("noise_reduction_enabled", True),
noise_reduction_level=config.get("noise_reduction_level", 2)
)
# Start streaming session
await self.stt_provider.start_streaming(self.config)
self.is_streaming = True
log("β
STT stream manager initialized")
async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
"""Process audio chunk and yield transcription results"""
if not self.is_streaming or not self.stt_provider:
log("β οΈ STT not streaming or provider not initialized")
return
try:
# Stream audio to STT provider
async for result in self.stt_provider.stream_audio(audio_chunk):
# Update accumulated text for final results
if result.is_final:
self.accumulated_text = result.text
self.last_final_result = result
yield result
except Exception as e:
log(f"β STT processing error: {e}")
# Yield error result
yield TranscriptionResult(
text="",
is_final=False,
confidence=0.0,
timestamp=datetime.now().timestamp(),
is_interrupt=True
)
async def stop_streaming(self) -> Optional[TranscriptionResult]:
"""Stop streaming and get final result"""
if not self.is_streaming or not self.stt_provider:
return None
try:
self.is_streaming = False
final_result = await self.stt_provider.stop_streaming()
if final_result:
self.accumulated_text = final_result.text
self.last_final_result = final_result
log("β
STT streaming stopped")
return final_result
except Exception as e:
log(f"β Error stopping STT stream: {e}")
return None
def reset(self):
"""Reset accumulated text and state"""
self.accumulated_text = ""
self.last_final_result = None
log("π STT stream manager reset")
def get_accumulated_text(self) -> str:
"""Get all accumulated text from the session"""
return self.accumulated_text |