Spaces:
Building
Building
""" | |
Real-time STT Manager for streaming transcription | |
""" | |
from typing import AsyncIterator, Optional, Dict, Any | |
import asyncio | |
from datetime import datetime | |
import sys | |
from stt_interface import STTInterface, STTConfig, TranscriptionResult | |
from config_provider import ConfigProvider | |
from logger import log_info, log_error, log_warning, log_debug | |
class STTStreamManager: | |
"""Manages real-time STT streaming""" | |
def __init__(self): | |
self.stt_provider: Optional[STTInterface] = None | |
self.is_streaming = False | |
self.config = None | |
self.accumulated_text = "" | |
self.last_final_result = None | |
async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]): | |
"""Initialize STT stream manager""" | |
self.stt_provider = stt_provider | |
# STTConfig objesi oluştur | |
self.config = STTConfig( | |
language=config.get("language", "tr-TR"), | |
sample_rate=config.get("sample_rate", 16000), | |
encoding=config.get("encoding", "WEBM_OPUS"), | |
enable_punctuation=config.get("enable_punctuation", True), | |
interim_results=config.get("interim_results", True), | |
single_utterance=False, # Important for continuous listening | |
speech_timeout_ms=config.get("speech_timeout_ms", 2000), | |
vad_enabled=config.get("vad_enabled", True), | |
noise_reduction_enabled=config.get("noise_reduction_enabled", True), | |
noise_reduction_level=config.get("noise_reduction_level", 2), | |
enable_word_timestamps=config.get("enable_word_timestamps", False), | |
model=config.get("model", "latest_long"), | |
use_enhanced=config.get("use_enhanced", True) | |
) | |
# Start streaming session - artık STTConfig objesi gönderiyoruz | |
await self.stt_provider.start_streaming(self.config) | |
self.is_streaming = True | |
log_info("✅ STT stream manager initialized") | |
async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]: | |
"""Process audio chunk and yield transcription results""" | |
if not self.is_streaming or not self.stt_provider: | |
log_info("⚠️ STT not streaming or provider not initialized") | |
return | |
try: | |
# Stream audio to STT provider | |
async for result in self.stt_provider.stream_audio(audio_chunk): | |
# Update accumulated text for final results | |
if result.is_final: | |
self.accumulated_text = result.text | |
self.last_final_result = result | |
yield result | |
except Exception as e: | |
log_error("❌ STT processing error", e) | |
# Yield error result | |
yield TranscriptionResult( | |
text="", | |
is_final=False, | |
confidence=0.0, | |
timestamp=datetime.now().timestamp(), | |
is_interrupt=True | |
) | |
async def stop_streaming(self) -> Optional[TranscriptionResult]: | |
"""Stop streaming and get final result""" | |
if not self.is_streaming or not self.stt_provider: | |
return None | |
try: | |
self.is_streaming = False | |
final_result = await self.stt_provider.stop_streaming() | |
if final_result: | |
self.accumulated_text = final_result.text | |
self.last_final_result = final_result | |
log_info("✅ STT streaming stopped") | |
return final_result | |
except Exception as e: | |
log_error("❌ Error stopping STT stream", e) | |
return None | |
def reset(self): | |
"""Reset accumulated text and state""" | |
self.accumulated_text = "" | |
self.last_final_result = None | |
log_info("🔄 STT stream manager reset") | |
def get_accumulated_text(self) -> str: | |
"""Get all accumulated text from the session""" | |
return self.accumulated_text |