Spaces:
Building
Building
Update stt/stt_lifecycle_manager.py
Browse files- stt/stt_lifecycle_manager.py +19 -14
stt/stt_lifecycle_manager.py
CHANGED
@@ -131,34 +131,39 @@ class STTLifecycleManager:
|
|
131 |
async def _handle_audio_chunk(self, event: Event):
|
132 |
"""Process audio chunk through VAD and collect"""
|
133 |
session_id = event.session_id
|
134 |
-
|
135 |
stt_session = self.stt_sessions.get(session_id)
|
136 |
if not stt_session or not stt_session.is_active:
|
|
|
137 |
return
|
138 |
-
|
139 |
try:
|
140 |
# Decode audio data
|
141 |
audio_data = base64.b64decode(event.data.get("audio_data", ""))
|
142 |
-
|
143 |
# Add to buffer
|
144 |
stt_session.audio_buffer.append(audio_data)
|
145 |
stt_session.total_chunks += 1
|
146 |
stt_session.total_bytes += len(audio_data)
|
147 |
-
|
148 |
# Process through VAD
|
149 |
is_speech, silence_duration_ms = stt_session.vad.process_chunk(audio_data)
|
150 |
-
|
151 |
# Check if utterance ended (silence threshold reached)
|
152 |
if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
|
153 |
log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
|
154 |
|
155 |
-
#
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
|
|
|
|
|
|
|
|
162 |
# Log progress periodically
|
163 |
if stt_session.total_chunks % 100 == 0:
|
164 |
log_debug(
|
@@ -168,14 +173,14 @@ class STTLifecycleManager:
|
|
168 |
bytes=stt_session.total_bytes,
|
169 |
vad_stats=stt_session.vad.get_stats()
|
170 |
)
|
171 |
-
|
172 |
except Exception as e:
|
173 |
log_error(
|
174 |
f"❌ Error processing audio chunk",
|
175 |
session_id=session_id,
|
176 |
error=str(e)
|
177 |
)
|
178 |
-
|
179 |
async def _handle_session_ended(self, event: Event):
|
180 |
"""Clean up STT resources when session ends"""
|
181 |
session_id = event.session_id
|
|
|
131 |
async def _handle_audio_chunk(self, event: Event):
|
132 |
"""Process audio chunk through VAD and collect"""
|
133 |
session_id = event.session_id
|
134 |
+
|
135 |
stt_session = self.stt_sessions.get(session_id)
|
136 |
if not stt_session or not stt_session.is_active:
|
137 |
+
# ✅ STT aktif değilse chunk'ları ignore et
|
138 |
return
|
139 |
+
|
140 |
try:
|
141 |
# Decode audio data
|
142 |
audio_data = base64.b64decode(event.data.get("audio_data", ""))
|
143 |
+
|
144 |
# Add to buffer
|
145 |
stt_session.audio_buffer.append(audio_data)
|
146 |
stt_session.total_chunks += 1
|
147 |
stt_session.total_bytes += len(audio_data)
|
148 |
+
|
149 |
# Process through VAD
|
150 |
is_speech, silence_duration_ms = stt_session.vad.process_chunk(audio_data)
|
151 |
+
|
152 |
# Check if utterance ended (silence threshold reached)
|
153 |
if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
|
154 |
log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
|
155 |
|
156 |
+
# ✅ STT'yi sadece aktifse durdur
|
157 |
+
if stt_session.is_active:
|
158 |
+
stt_session.is_active = False # ✅ Hemen inaktif yap, tekrar trigger olmasın
|
159 |
+
|
160 |
+
# Stop STT to trigger transcription
|
161 |
+
await self.event_bus.publish(Event(
|
162 |
+
type=EventType.STT_STOPPED,
|
163 |
+
session_id=session_id,
|
164 |
+
data={"reason": "silence_detected"}
|
165 |
+
))
|
166 |
+
|
167 |
# Log progress periodically
|
168 |
if stt_session.total_chunks % 100 == 0:
|
169 |
log_debug(
|
|
|
173 |
bytes=stt_session.total_bytes,
|
174 |
vad_stats=stt_session.vad.get_stats()
|
175 |
)
|
176 |
+
|
177 |
except Exception as e:
|
178 |
log_error(
|
179 |
f"❌ Error processing audio chunk",
|
180 |
session_id=session_id,
|
181 |
error=str(e)
|
182 |
)
|
183 |
+
|
184 |
async def _handle_session_ended(self, event: Event):
|
185 |
"""Clean up STT resources when session ends"""
|
186 |
session_id = event.session_id
|