Spaces:

UcsTurkey
/

flare

Building

File size: 3,800 Bytes

299226b

"""
Real-time STT Manager for streaming transcription
"""
from typing import AsyncIterator, Optional, Dict, Any
import asyncio
from datetime import datetime
import sys

from stt_interface import STTInterface, STTConfig, TranscriptionResult
from config_provider import ConfigProvider
from utils import log

class STTStreamManager:
    """Manages real-time STT streaming"""
    
    def __init__(self):
        self.stt_provider: Optional[STTInterface] = None
        self.is_streaming = False
        self.config = None
        self.accumulated_text = ""
        self.last_final_result = None
        
    async def initialize(self, stt_provider: STTInterface, config: Dict[str, Any]):
        """Initialize STT stream manager"""
        self.stt_provider = stt_provider
        self.config = STTConfig(
            language=config.get("language", "tr-TR"),
            sample_rate=config.get("sample_rate", 16000),
            encoding=config.get("encoding", "WEBM_OPUS"),
            enable_punctuation=config.get("enable_punctuation", True),
            interim_results=config.get("interim_results", True),
            single_utterance=False,  # Important for continuous listening
            speech_timeout_ms=config.get("speech_timeout_ms", 2000),
            vad_enabled=config.get("vad_enabled", True),
            noise_reduction_enabled=config.get("noise_reduction_enabled", True),
            noise_reduction_level=config.get("noise_reduction_level", 2)
        )
        
        # Start streaming session
        await self.stt_provider.start_streaming(self.config)
        self.is_streaming = True
        log("✅ STT stream manager initialized")
        
    async def process_chunk(self, audio_chunk: bytes) -> AsyncIterator[TranscriptionResult]:
        """Process audio chunk and yield transcription results"""
        if not self.is_streaming or not self.stt_provider:
            log("⚠️ STT not streaming or provider not initialized")
            return
            
        try:
            # Stream audio to STT provider
            async for result in self.stt_provider.stream_audio(audio_chunk):
                # Update accumulated text for final results
                if result.is_final:
                    self.accumulated_text = result.text
                    self.last_final_result = result
                    
                yield result
                
        except Exception as e:
            log(f"❌ STT processing error: {e}")
            # Yield error result
            yield TranscriptionResult(
                text="",
                is_final=False,
                confidence=0.0,
                timestamp=datetime.now().timestamp(),
                is_interrupt=True
            )
            
    async def stop_streaming(self) -> Optional[TranscriptionResult]:
        """Stop streaming and get final result"""
        if not self.is_streaming or not self.stt_provider:
            return None
            
        try:
            self.is_streaming = False
            final_result = await self.stt_provider.stop_streaming()
            
            if final_result:
                self.accumulated_text = final_result.text
                self.last_final_result = final_result
                
            log("✅ STT streaming stopped")
            return final_result
            
        except Exception as e:
            log(f"❌ Error stopping STT stream: {e}")
            return None
            
    def reset(self):
        """Reset accumulated text and state"""
        self.accumulated_text = ""
        self.last_final_result = None
        log("🔄 STT stream manager reset")
        
    def get_accumulated_text(self) -> str:
        """Get all accumulated text from the session"""
        return self.accumulated_text