ciyidogan commited on
Commit
52b686b
·
verified ·
1 Parent(s): ee90174

Update stt/stt_lifecycle_manager.py

Browse files
Files changed (1) hide show
  1. stt/stt_lifecycle_manager.py +32 -26
stt/stt_lifecycle_manager.py CHANGED
@@ -133,15 +133,19 @@ class STTLifecycleManager:
133
  session_id = event.session_id
134
 
135
  stt_session = self.stt_sessions.get(session_id)
136
- if not stt_session or not stt_session.is_active:
137
- # STT aktif değilse chunk'ları ignore et
 
 
 
 
138
  return
139
 
140
  try:
141
  # Decode audio data
142
  audio_data = base64.b64decode(event.data.get("audio_data", ""))
143
 
144
- # Add to buffer
145
  stt_session.audio_buffer.append(audio_data)
146
  stt_session.total_chunks += 1
147
  stt_session.total_bytes += len(audio_data)
@@ -153,16 +157,17 @@ class STTLifecycleManager:
153
  if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
154
  log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
155
 
156
- # ✅ STT'yi sadece aktifse durdur
157
- if stt_session.is_active:
158
- stt_session.is_active = False # ✅ Hemen inaktif yap, tekrar trigger olmasın
159
-
160
- # Stop STT to trigger transcription
161
- await self.event_bus.publish(Event(
162
- type=EventType.STT_STOPPED,
163
- session_id=session_id,
164
- data={"reason": "silence_detected"}
165
- ))
 
166
 
167
  # Log progress periodically
168
  if stt_session.total_chunks % 100 == 0:
@@ -199,12 +204,17 @@ class STTLifecycleManager:
199
  return
200
 
201
  try:
202
- if stt_session.is_active and stt_session.audio_buffer:
 
 
 
 
203
  # Combine audio chunks
204
  combined_audio = b''.join(stt_session.audio_buffer)
205
 
206
- # Transcribe using batch mode
207
  log_info(f"📝 Transcribing {len(combined_audio)} bytes of audio", session_id=session_id)
 
 
208
  result = await stt_session.stt_instance.transcribe(
209
  audio_data=combined_audio,
210
  config=stt_session.config
@@ -221,21 +231,17 @@ class STTLifecycleManager:
221
  "confidence": result.confidence
222
  }
223
  ))
 
224
  else:
225
  log_warning(f"⚠️ No transcription result", session_id=session_id)
 
 
 
226
 
227
- # Mark as inactive and reset
228
- stt_session.is_active = False
229
  stt_session.reset()
230
 
231
- # Send STT_STOPPED event
232
- await self.event_bus.publish(Event(
233
- type=EventType.STT_STOPPED,
234
- session_id=session_id,
235
- data={"reason": reason}
236
- ))
237
-
238
- log_info(f"✅ STT stopped", session_id=session_id)
239
 
240
  except Exception as e:
241
  log_error(
@@ -243,7 +249,7 @@ class STTLifecycleManager:
243
  session_id=session_id,
244
  error=str(e)
245
  )
246
-
247
  async def _cleanup_session(self, session_id: str):
248
  """Clean up STT session"""
249
  stt_session = self.stt_sessions.pop(session_id, None)
 
133
  session_id = event.session_id
134
 
135
  stt_session = self.stt_sessions.get(session_id)
136
+ if not stt_session:
137
+ # STT session yoksa chunk'ları ignore et
138
+ return
139
+
140
+ # ✅ STT inaktifse chunk'ları tamamen ignore et
141
+ if not stt_session.is_active:
142
  return
143
 
144
  try:
145
  # Decode audio data
146
  audio_data = base64.b64decode(event.data.get("audio_data", ""))
147
 
148
+ # Add to buffer - sadece aktifken
149
  stt_session.audio_buffer.append(audio_data)
150
  stt_session.total_chunks += 1
151
  stt_session.total_bytes += len(audio_data)
 
157
  if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
158
  log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
159
 
160
+ # ✅ Hemen STT'yi inaktif yap ki daha fazla chunk işlenmesin
161
+ stt_session.is_active = False
162
+
163
+ # ✅ Frontend'e derhal recording durdurmayı söyle
164
+ await self.event_bus.publish(Event(
165
+ type=EventType.STT_STOPPED,
166
+ session_id=session_id,
167
+ data={"reason": "silence_detected", "stop_recording": True}
168
+ ))
169
+
170
+ log_info(f"🛑 STT stopped and frontend notified to stop recording", session_id=session_id)
171
 
172
  # Log progress periodically
173
  if stt_session.total_chunks % 100 == 0:
 
204
  return
205
 
206
  try:
207
+ # STT'yi inaktif yap ki daha fazla chunk işlenmesin
208
+ stt_session.is_active = False
209
+
210
+ # ✅ Transcription sadece audio buffer varsa ve reason silence_detected ise yap
211
+ if reason == "silence_detected" and stt_session.audio_buffer:
212
  # Combine audio chunks
213
  combined_audio = b''.join(stt_session.audio_buffer)
214
 
 
215
  log_info(f"📝 Transcribing {len(combined_audio)} bytes of audio", session_id=session_id)
216
+
217
+ # Transcribe using batch mode
218
  result = await stt_session.stt_instance.transcribe(
219
  audio_data=combined_audio,
220
  config=stt_session.config
 
231
  "confidence": result.confidence
232
  }
233
  ))
234
+ log_info(f"✅ Transcription completed: '{result.text}'", session_id=session_id)
235
  else:
236
  log_warning(f"⚠️ No transcription result", session_id=session_id)
237
+
238
+ elif reason != "silence_detected":
239
+ log_info(f"📝 STT stopped without transcription (reason: {reason})", session_id=session_id)
240
 
241
+ # Reset session for next utterance
 
242
  stt_session.reset()
243
 
244
+ log_info(f"✅ STT session reset and ready for next utterance", session_id=session_id)
 
 
 
 
 
 
 
245
 
246
  except Exception as e:
247
  log_error(
 
249
  session_id=session_id,
250
  error=str(e)
251
  )
252
+
253
  async def _cleanup_session(self, session_id: str):
254
  """Clean up STT session"""
255
  stt_session = self.stt_sessions.pop(session_id, None)