ciyidogan commited on
Commit
2340c2e
·
verified ·
1 Parent(s): 9a02b1d

Update state_orchestrator.py

Browse files
Files changed (1) hide show
  1. state_orchestrator.py +36 -31
state_orchestrator.py CHANGED
@@ -261,39 +261,44 @@ class StateOrchestrator:
261
  return
262
 
263
  current_state = context.state
 
 
 
 
 
 
 
 
 
 
 
264
 
265
  if current_state != ConversationState.LISTENING:
266
  log_warning(
267
  f"⚠️ STT result in unexpected state",
268
  session_id=session_id,
269
- state=current_state.value # .value ekledik
270
  )
271
  return
272
 
273
- result_data = event.data
274
- is_final = result_data.get("is_final", False)
275
 
276
- # Sadece final result'ları logla
277
- if is_final:
278
- text = result_data.get("text", "")
279
- log_info(f"💬 Final transcription: '{text}'", session_id=session_id)
280
-
281
- # Stop STT
282
- await self.event_bus.publish(Event(
283
- type=EventType.STT_STOPPED,
284
- session_id=session_id,
285
- data={"reason": "final_result"}
286
- ))
287
-
288
- # Transition to processing
289
- await self.transition_to(session_id, ConversationState.PROCESSING_SPEECH)
290
-
291
- # Send to LLM
292
- await self.event_bus.publish(Event(
293
- type=EventType.LLM_PROCESSING_STARTED,
294
- session_id=session_id,
295
- data={"text": text}
296
- ))
297
 
298
  async def _handle_llm_response_ready(self, event: Event):
299
  """Handle LLM response"""
@@ -355,20 +360,20 @@ class StateOrchestrator:
355
  log_info(f"🎵 Audio playback completed", session_id=session_id, state=current_state.value)
356
 
357
  if current_state in [ConversationState.PLAYING_WELCOME, ConversationState.PLAYING_RESPONSE]:
358
- # Transition back to listening
359
  await self.transition_to(session_id, ConversationState.LISTENING)
360
-
361
- locale = context.session.locale if hasattr(context.session, 'locale') else 'tr'
362
 
363
- # Start STT
 
364
  await self.event_bus.publish(Event(
365
  type=EventType.STT_STARTED,
366
  session_id=session_id,
367
  data={
368
  "locale": locale,
369
- "interim_results": True,
370
- "vad_enabled": True
371
- },
 
372
  ))
373
 
374
  # Send STT ready signal to frontend
 
261
  return
262
 
263
  current_state = context.state
264
+ result_data = event.data
265
+ is_final = result_data.get("is_final", False)
266
+
267
+ # Sadece final result'ları işle
268
+ if not is_final:
269
+ return
270
+
271
+ text = result_data.get("text", "").strip()
272
+ if not text:
273
+ log_warning(f"⚠️ Empty final transcription", session_id=session_id)
274
+ return
275
 
276
  if current_state != ConversationState.LISTENING:
277
  log_warning(
278
  f"⚠️ STT result in unexpected state",
279
  session_id=session_id,
280
+ state=current_state.value
281
  )
282
  return
283
 
284
+ log_info(f"💬 Final transcription: '{text}'", session_id=session_id)
 
285
 
286
+ # STT'yi otomatik durdur - single utterance modunda zaten duracak ama emin olmak için
287
+ await self.event_bus.publish(Event(
288
+ type=EventType.STT_STOPPED,
289
+ session_id=session_id,
290
+ data={"reason": "utterance_completed"}
291
+ ))
292
+
293
+ # Transition to processing
294
+ await self.transition_to(session_id, ConversationState.PROCESSING_SPEECH)
295
+
296
+ # Send to LLM
297
+ await self.event_bus.publish(Event(
298
+ type=EventType.LLM_PROCESSING_STARTED,
299
+ session_id=session_id,
300
+ data={"text": text}
301
+ ))
 
 
 
 
 
302
 
303
  async def _handle_llm_response_ready(self, event: Event):
304
  """Handle LLM response"""
 
360
  log_info(f"🎵 Audio playback completed", session_id=session_id, state=current_state.value)
361
 
362
  if current_state in [ConversationState.PLAYING_WELCOME, ConversationState.PLAYING_RESPONSE]:
363
+ # Transition to listening
364
  await self.transition_to(session_id, ConversationState.LISTENING)
 
 
365
 
366
+ # STT'yi başlat - tek konuşma modunda
367
+ locale = context.metadata.get("locale", "tr")
368
  await self.event_bus.publish(Event(
369
  type=EventType.STT_STARTED,
370
  session_id=session_id,
371
  data={
372
  "locale": locale,
373
+ "single_utterance": True, # ✅ Tek konuşma modu
374
+ "interim_results": False, # ✅ Sadece final
375
+ "speech_timeout_ms": 2000 # 2 saniye sessizlik
376
+ }
377
  ))
378
 
379
  # Send STT ready signal to frontend