Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +13 -5
websocket_handler.py
CHANGED
@@ -85,19 +85,27 @@ class SilenceDetector:
|
|
85 |
def is_silence(self, audio_chunk: bytes) -> bool:
|
86 |
"""Check if audio chunk is silence"""
|
87 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
88 |
# Convert bytes to numpy array (assuming 16-bit PCM)
|
89 |
audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
|
90 |
|
91 |
-
#
|
92 |
if len(audio_data) == 0:
|
93 |
return True
|
94 |
|
95 |
rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
|
96 |
-
normalized_rms = rms / 32768.0
|
97 |
-
|
98 |
-
# Audio energy log'unu kaldırdık
|
99 |
|
100 |
return normalized_rms < self.energy_threshold
|
|
|
101 |
except Exception as e:
|
102 |
log_warning(f"Silence detection error: {e}")
|
103 |
return False
|
@@ -212,7 +220,7 @@ class RealtimeSession:
|
|
212 |
stt_config = {
|
213 |
"language": language_code,
|
214 |
"interim_results": config.get("interim_results", True),
|
215 |
-
"single_utterance":
|
216 |
"enable_punctuation": config.get("enable_punctuation", True),
|
217 |
"sample_rate": 16000,
|
218 |
"encoding": "WEBM_OPUS"
|
|
|
85 |
def is_silence(self, audio_chunk: bytes) -> bool:
|
86 |
"""Check if audio chunk is silence"""
|
87 |
try:
|
88 |
+
# Audio chunk boyutunu kontrol et
|
89 |
+
if len(audio_chunk) == 0:
|
90 |
+
return True
|
91 |
+
|
92 |
+
# Chunk boyutu 2'nin katı olmalı (16-bit audio için)
|
93 |
+
if len(audio_chunk) % 2 != 0:
|
94 |
+
# Tek byte varsa, son byte'ı at
|
95 |
+
audio_chunk = audio_chunk[:-1]
|
96 |
+
|
97 |
# Convert bytes to numpy array (assuming 16-bit PCM)
|
98 |
audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
|
99 |
|
100 |
+
# RMS hesapla
|
101 |
if len(audio_data) == 0:
|
102 |
return True
|
103 |
|
104 |
rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
|
105 |
+
normalized_rms = rms / 32768.0
|
|
|
|
|
106 |
|
107 |
return normalized_rms < self.energy_threshold
|
108 |
+
|
109 |
except Exception as e:
|
110 |
log_warning(f"Silence detection error: {e}")
|
111 |
return False
|
|
|
220 |
stt_config = {
|
221 |
"language": language_code,
|
222 |
"interim_results": config.get("interim_results", True),
|
223 |
+
"single_utterance": True,
|
224 |
"enable_punctuation": config.get("enable_punctuation", True),
|
225 |
"sample_rate": 16000,
|
226 |
"encoding": "WEBM_OPUS"
|