flare / realtime_stt_manager.py
ciyidogan's picture
Update realtime_stt_manager.py
19f1bba verified
raw
history blame
4.15 kB
"""
Real-time STT Manager for streaming transcription
"""
from typing import AsyncIterator, Optional, Dict, Any
import asyncio
from datetime import datetime
import sys
from stt_interface import STTInterface, STTConfig, TranscriptionResult
from config_provider import ConfigProvider
from logger import log_info, log_error, log_warning, log_debug
class STTStreamManager:
"""Manages real-time STT streaming"""
def __init__(self):
self.stt_provider: Optional[STTInterface] = None
self.is_streaming = False
self.config = None
self.accumulated_text = ""
self.last_final_result = None
async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
"""Initialize STT stream manager"""
self.stt_provider = stt_provider
# STTConfig objesi oluştur
self.config = STTConfig(
language=config.get("language", "tr-TR"),
sample_rate=config.get("sample_rate", 16000),
encoding=config.get("encoding", "WEBM_OPUS"),
enable_punctuation=config.get("enable_punctuation", True),
interim_results=config.get("interim_results", True),
single_utterance=False, # Important for continuous listening
speech_timeout_ms=config.get("speech_timeout_ms", 2000),
vad_enabled=config.get("vad_enabled", True),
noise_reduction_enabled=config.get("noise_reduction_enabled", True),
noise_reduction_level=config.get("noise_reduction_level", 2),
enable_word_timestamps=config.get("enable_word_timestamps", False),
model=config.get("model", "latest_long"),
use_enhanced=config.get("use_enhanced", True)
)
# Start streaming session - artık STTConfig objesi gönderiyoruz
await self.stt_provider.start_streaming(self.config)
self.is_streaming = True
log_info("✅ STT stream manager initialized")
async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
"""Process audio chunk and yield transcription results"""
if not self.is_streaming or not self.stt_provider:
log_info("⚠️ STT not streaming or provider not initialized")
return
try:
# Stream audio to STT provider
async for result in self.stt_provider.stream_audio(audio_chunk):
# Update accumulated text for final results
if result.is_final:
self.accumulated_text = result.text
self.last_final_result = result
yield result
except Exception as e:
log_error("❌ STT processing error", e)
# Yield error result
yield TranscriptionResult(
text="",
is_final=False,
confidence=0.0,
timestamp=datetime.now().timestamp(),
is_interrupt=True
)
async def stop_streaming(self) -> Optional[TranscriptionResult]:
"""Stop streaming and get final result"""
if not self.is_streaming or not self.stt_provider:
return None
try:
self.is_streaming = False
final_result = await self.stt_provider.stop_streaming()
if final_result:
self.accumulated_text = final_result.text
self.last_final_result = final_result
log_info("✅ STT streaming stopped")
return final_result
except Exception as e:
log_error("❌ Error stopping STT stream", e)
return None
def reset(self):
"""Reset accumulated text and state"""
self.accumulated_text = ""
self.last_final_result = None
log_info("🔄 STT stream manager reset")
def get_accumulated_text(self) -> str:
"""Get all accumulated text from the session"""
return self.accumulated_text