ciyidogan commited on
Commit
931d646
·
verified ·
1 Parent(s): 5ab11dd

Update stt/stt_lifecycle_manager.py

Browse files
Files changed (1) hide show
  1. stt/stt_lifecycle_manager.py +19 -14
stt/stt_lifecycle_manager.py CHANGED
@@ -131,34 +131,39 @@ class STTLifecycleManager:
131
  async def _handle_audio_chunk(self, event: Event):
132
  """Process audio chunk through VAD and collect"""
133
  session_id = event.session_id
134
-
135
  stt_session = self.stt_sessions.get(session_id)
136
  if not stt_session or not stt_session.is_active:
 
137
  return
138
-
139
  try:
140
  # Decode audio data
141
  audio_data = base64.b64decode(event.data.get("audio_data", ""))
142
-
143
  # Add to buffer
144
  stt_session.audio_buffer.append(audio_data)
145
  stt_session.total_chunks += 1
146
  stt_session.total_bytes += len(audio_data)
147
-
148
  # Process through VAD
149
  is_speech, silence_duration_ms = stt_session.vad.process_chunk(audio_data)
150
-
151
  # Check if utterance ended (silence threshold reached)
152
  if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
153
  log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
154
 
155
- # Stop STT to trigger transcription
156
- await self.event_bus.publish(Event(
157
- type=EventType.STT_STOPPED,
158
- session_id=session_id,
159
- data={"reason": "silence_detected"}
160
- ))
161
-
 
 
 
 
162
  # Log progress periodically
163
  if stt_session.total_chunks % 100 == 0:
164
  log_debug(
@@ -168,14 +173,14 @@ class STTLifecycleManager:
168
  bytes=stt_session.total_bytes,
169
  vad_stats=stt_session.vad.get_stats()
170
  )
171
-
172
  except Exception as e:
173
  log_error(
174
  f"❌ Error processing audio chunk",
175
  session_id=session_id,
176
  error=str(e)
177
  )
178
-
179
  async def _handle_session_ended(self, event: Event):
180
  """Clean up STT resources when session ends"""
181
  session_id = event.session_id
 
131
  async def _handle_audio_chunk(self, event: Event):
132
  """Process audio chunk through VAD and collect"""
133
  session_id = event.session_id
134
+
135
  stt_session = self.stt_sessions.get(session_id)
136
  if not stt_session or not stt_session.is_active:
137
+ # ✅ STT aktif değilse chunk'ları ignore et
138
  return
139
+
140
  try:
141
  # Decode audio data
142
  audio_data = base64.b64decode(event.data.get("audio_data", ""))
143
+
144
  # Add to buffer
145
  stt_session.audio_buffer.append(audio_data)
146
  stt_session.total_chunks += 1
147
  stt_session.total_bytes += len(audio_data)
148
+
149
  # Process through VAD
150
  is_speech, silence_duration_ms = stt_session.vad.process_chunk(audio_data)
151
+
152
  # Check if utterance ended (silence threshold reached)
153
  if not is_speech and silence_duration_ms >= 2000: # 2 seconds of silence
154
  log_info(f"💬 Utterance ended after {silence_duration_ms}ms silence", session_id=session_id)
155
 
156
+ # STT'yi sadece aktifse durdur
157
+ if stt_session.is_active:
158
+ stt_session.is_active = False # ✅ Hemen inaktif yap, tekrar trigger olmasın
159
+
160
+ # Stop STT to trigger transcription
161
+ await self.event_bus.publish(Event(
162
+ type=EventType.STT_STOPPED,
163
+ session_id=session_id,
164
+ data={"reason": "silence_detected"}
165
+ ))
166
+
167
  # Log progress periodically
168
  if stt_session.total_chunks % 100 == 0:
169
  log_debug(
 
173
  bytes=stt_session.total_bytes,
174
  vad_stats=stt_session.vad.get_stats()
175
  )
176
+
177
  except Exception as e:
178
  log_error(
179
  f"❌ Error processing audio chunk",
180
  session_id=session_id,
181
  error=str(e)
182
  )
183
+
184
  async def _handle_session_ended(self, event: Event):
185
  """Clean up STT resources when session ends"""
186
  session_id = event.session_id