Spaces:

UcsTurkey
/

flare

Running

ciyidogan commited on Jul 6

Commit

668e7b4

verified ·

1 Parent(s): 93dcbd8

Update websocket_handler.py

Files changed (1) hide show

websocket_handler.py CHANGED Viewed

@@ -85,19 +85,27 @@ class SilenceDetector:
     def is_silence(self, audio_chunk: bytes) -> bool:
         """Check if audio chunk is silence"""
         try:
             # Convert bytes to numpy array (assuming 16-bit PCM)
             audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
-            # Calculate RMS energy
             if len(audio_data) == 0:
                 return True
             rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
-            normalized_rms = rms / 32768.0  # Normalize for 16-bit audio
-            # Audio energy log'unu kaldırdık
             return normalized_rms < self.energy_threshold
         except Exception as e:
             log_warning(f"Silence detection error: {e}")
             return False
@@ -212,7 +220,7 @@ class RealtimeSession:
             stt_config = {
                 "language": language_code,
                 "interim_results": config.get("interim_results", True),
-                "single_utterance": False,
                 "enable_punctuation": config.get("enable_punctuation", True),
                 "sample_rate": 16000,
                 "encoding": "WEBM_OPUS"

     def is_silence(self, audio_chunk: bytes) -> bool:
         """Check if audio chunk is silence"""
         try:
+            # Audio chunk boyutunu kontrol et
+            if len(audio_chunk) == 0:
+                return True
+            # Chunk boyutu 2'nin katı olmalı (16-bit audio için)
+            if len(audio_chunk) % 2 != 0:
+                # Tek byte varsa, son byte'ı at
+                audio_chunk = audio_chunk[:-1]
             # Convert bytes to numpy array (assuming 16-bit PCM)
             audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
+            # RMS hesapla
             if len(audio_data) == 0:
                 return True
             rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
+            normalized_rms = rms / 32768.0
             return normalized_rms < self.energy_threshold
         except Exception as e:
             log_warning(f"Silence detection error: {e}")
             return False
             stt_config = {
                 "language": language_code,
                 "interim_results": config.get("interim_results", True),
+                "single_utterance": True,
                 "enable_punctuation": config.get("enable_punctuation", True),
                 "sample_rate": 16000,
                 "encoding": "WEBM_OPUS"