ciyidogan commited on
Commit
114bc80
Β·
verified Β·
1 Parent(s): 605e58b

Upload tts_lifecycle_manager.py

Browse files
Files changed (1) hide show
  1. tts/tts_lifecycle_manager.py +57 -57
tts/tts_lifecycle_manager.py CHANGED
@@ -9,8 +9,8 @@ from datetime import datetime
9
  import traceback
10
  import base64
11
 
12
- from event_bus import EventBus, Event, EventType, publish_error
13
- from resource_manager import ResourceManager, ResourceType
14
  from tts.tts_factory import TTSFactory
15
  from tts.tts_interface import TTSInterface
16
  from tts.tts_preprocessor import TTSPreprocessor
@@ -19,7 +19,7 @@ from utils.logger import log_info, log_error, log_debug, log_warning
19
 
20
  class TTSJob:
21
  """TTS synthesis job"""
22
-
23
  def __init__(self, job_id: str, session_id: str, text: str, is_welcome: bool = False):
24
  self.job_id = job_id
25
  self.session_id = session_id
@@ -30,12 +30,12 @@ class TTSJob:
30
  self.audio_data: Optional[bytes] = None
31
  self.error: Optional[str] = None
32
  self.chunks_sent = 0
33
-
34
  def complete(self, audio_data: bytes):
35
  """Mark job as completed"""
36
  self.audio_data = audio_data
37
  self.completed_at = datetime.utcnow()
38
-
39
  def fail(self, error: str):
40
  """Mark job as failed"""
41
  self.error = error
@@ -44,7 +44,7 @@ class TTSJob:
44
 
45
  class TTSSession:
46
  """TTS session wrapper"""
47
-
48
  def __init__(self, session_id: str, tts_instance: TTSInterface):
49
  self.session_id = session_id
50
  self.tts_instance = tts_instance
@@ -55,7 +55,7 @@ class TTSSession:
55
  self.last_activity = datetime.utcnow()
56
  self.total_jobs = 0
57
  self.total_chars = 0
58
-
59
  def update_activity(self):
60
  """Update last activity timestamp"""
61
  self.last_activity = datetime.utcnow()
@@ -63,7 +63,7 @@ class TTSSession:
63
 
64
  class TTSLifecycleManager:
65
  """Manages TTS instances lifecycle"""
66
-
67
  def __init__(self, event_bus: EventBus, resource_manager: ResourceManager):
68
  self.event_bus = event_bus
69
  self.resource_manager = resource_manager
@@ -71,12 +71,12 @@ class TTSLifecycleManager:
71
  self.chunk_size = 16384 # 16KB chunks for base64
72
  self._setup_event_handlers()
73
  self._setup_resource_pool()
74
-
75
  def _setup_event_handlers(self):
76
  """Subscribe to TTS-related events"""
77
  self.event_bus.subscribe(EventType.TTS_STARTED, self._handle_tts_start)
78
  self.event_bus.subscribe(EventType.SESSION_ENDED, self._handle_session_ended)
79
-
80
  def _setup_resource_pool(self):
81
  """Setup TTS instance pool"""
82
  self.resource_manager.register_pool(
@@ -85,7 +85,7 @@ class TTSLifecycleManager:
85
  max_idle=3,
86
  max_age_seconds=600 # 10 minutes
87
  )
88
-
89
  async def _create_tts_instance(self) -> Optional[TTSInterface]:
90
  """Factory for creating TTS instances"""
91
  try:
@@ -93,24 +93,24 @@ class TTSLifecycleManager:
93
  if not tts_instance:
94
  log_warning("⚠️ No TTS provider configured")
95
  return None
96
-
97
  log_debug("πŸ”Š Created new TTS instance")
98
  return tts_instance
99
-
100
  except Exception as e:
101
  log_error(f"❌ Failed to create TTS instance", error=str(e))
102
  return None
103
-
104
  async def _handle_tts_start(self, event: Event):
105
  """Handle TTS synthesis request"""
106
  session_id = event.session_id
107
  text = event.data.get("text", "")
108
  is_welcome = event.data.get("is_welcome", False)
109
-
110
  if not text:
111
  log_warning(f"⚠️ Empty text for TTS", session_id=session_id)
112
  return
113
-
114
  try:
115
  log_info(
116
  f"πŸ”Š Starting TTS",
@@ -118,7 +118,7 @@ class TTSLifecycleManager:
118
  text_length=len(text),
119
  is_welcome=is_welcome
120
  )
121
-
122
  # Get or create session
123
  if session_id not in self.tts_sessions:
124
  # Acquire TTS instance from pool
@@ -129,23 +129,23 @@ class TTSLifecycleManager:
129
  resource_type=ResourceType.TTS_INSTANCE,
130
  cleanup_callback=self._cleanup_tts_instance
131
  )
132
-
133
  if not tts_instance:
134
  # No TTS available
135
  await self._handle_no_tts(session_id, text, is_welcome)
136
  return
137
-
138
  # Create session
139
  tts_session = TTSSession(session_id, tts_instance)
140
-
141
  # Get locale from event data or default
142
  locale = event.data.get("locale", "tr")
143
  tts_session.preprocessor = TTSPreprocessor(language=locale)
144
-
145
  self.tts_sessions[session_id] = tts_session
146
  else:
147
  tts_session = self.tts_sessions[session_id]
148
-
149
  # Create job
150
  job_id = f"{session_id}_{tts_session.total_jobs}"
151
  job = TTSJob(job_id, session_id, text, is_welcome)
@@ -153,10 +153,10 @@ class TTSLifecycleManager:
153
  tts_session.total_jobs += 1
154
  tts_session.total_chars += len(text)
155
  tts_session.update_activity()
156
-
157
  # Process TTS
158
  await self._process_tts_job(tts_session, job)
159
-
160
  except Exception as e:
161
  log_error(
162
  f"❌ Failed to start TTS",
@@ -164,14 +164,14 @@ class TTSLifecycleManager:
164
  error=str(e),
165
  traceback=traceback.format_exc()
166
  )
167
-
168
  # Publish error event
169
  await publish_error(
170
  session_id=session_id,
171
  error_type="tts_error",
172
  error_message=f"Failed to synthesize speech: {str(e)}"
173
  )
174
-
175
  async def _process_tts_job(self, tts_session: TTSSession, job: TTSJob):
176
  """Process a TTS job"""
177
  try:
@@ -180,43 +180,43 @@ class TTSLifecycleManager:
180
  job.text,
181
  tts_session.tts_instance.get_preprocessing_flags()
182
  )
183
-
184
  log_debug(
185
  f"πŸ“ TTS preprocessed",
186
  session_id=job.session_id,
187
  original_length=len(job.text),
188
  processed_length=len(processed_text)
189
  )
190
-
191
  # Synthesize audio
192
  audio_data = await tts_session.tts_instance.synthesize(processed_text)
193
-
194
  if not audio_data:
195
  raise ValueError("TTS returned empty audio data")
196
-
197
  job.complete(audio_data)
198
-
199
  log_info(
200
  f"βœ… TTS synthesis complete",
201
  session_id=job.session_id,
202
  audio_size=len(audio_data),
203
  duration_ms=(datetime.utcnow() - job.created_at).total_seconds() * 1000
204
  )
205
-
206
  # Stream audio chunks
207
  await self._stream_audio_chunks(tts_session, job)
208
-
209
  # Move to completed
210
  tts_session.active_jobs.pop(job.job_id, None)
211
  tts_session.completed_jobs.append(job)
212
-
213
  # Keep only last 10 completed jobs
214
  if len(tts_session.completed_jobs) > 10:
215
  tts_session.completed_jobs.pop(0)
216
-
217
  except Exception as e:
218
  job.fail(str(e))
219
-
220
  # Handle specific TTS errors
221
  error_message = str(e)
222
  if "quota" in error_message.lower() or "limit" in error_message.lower():
@@ -237,17 +237,17 @@ class TTSLifecycleManager:
237
  error_type="tts_error",
238
  error_message=error_message
239
  )
240
-
241
  async def _stream_audio_chunks(self, tts_session: TTSSession, job: TTSJob):
242
  """Stream audio data as chunks"""
243
  if not job.audio_data:
244
  return
245
-
246
  # Convert to base64
247
  audio_base64 = base64.b64encode(job.audio_data).decode('utf-8')
248
  total_length = len(audio_base64)
249
  total_chunks = (total_length + self.chunk_size - 1) // self.chunk_size
250
-
251
  log_debug(
252
  f"πŸ“€ Streaming TTS audio",
253
  session_id=job.session_id,
@@ -255,13 +255,13 @@ class TTSLifecycleManager:
255
  base64_size=total_length,
256
  chunks=total_chunks
257
  )
258
-
259
  # Stream chunks
260
  for i in range(0, total_length, self.chunk_size):
261
  chunk = audio_base64[i:i + self.chunk_size]
262
  chunk_index = i // self.chunk_size
263
  is_last = chunk_index == total_chunks - 1
264
-
265
  await self.event_bus.publish(Event(
266
  type=EventType.TTS_CHUNK_READY,
267
  session_id=job.session_id,
@@ -275,12 +275,12 @@ class TTSLifecycleManager:
275
  },
276
  priority=8 # Higher priority for audio chunks
277
  ))
278
-
279
  job.chunks_sent += 1
280
-
281
  # Small delay between chunks to prevent overwhelming
282
  await asyncio.sleep(0.01)
283
-
284
  # Notify completion
285
  await self.event_bus.publish(Event(
286
  type=EventType.TTS_COMPLETED,
@@ -291,17 +291,17 @@ class TTSLifecycleManager:
291
  "is_welcome": job.is_welcome
292
  }
293
  ))
294
-
295
  log_info(
296
  f"βœ… TTS streaming complete",
297
  session_id=job.session_id,
298
  chunks_sent=job.chunks_sent
299
  )
300
-
301
  async def _handle_no_tts(self, session_id: str, text: str, is_welcome: bool):
302
  """Handle case when TTS is not available"""
303
  log_warning(f"⚠️ No TTS available, skipping audio generation", session_id=session_id)
304
-
305
  # Just notify completion without audio
306
  await self.event_bus.publish(Event(
307
  type=EventType.TTS_COMPLETED,
@@ -312,51 +312,51 @@ class TTSLifecycleManager:
312
  "is_welcome": is_welcome
313
  }
314
  ))
315
-
316
  async def _handle_session_ended(self, event: Event):
317
  """Clean up TTS resources when session ends"""
318
  session_id = event.session_id
319
  await self._cleanup_session(session_id)
320
-
321
  async def _cleanup_session(self, session_id: str):
322
  """Clean up TTS session"""
323
  tts_session = self.tts_sessions.pop(session_id, None)
324
  if not tts_session:
325
  return
326
-
327
  try:
328
  # Cancel any active jobs
329
  for job in tts_session.active_jobs.values():
330
  if not job.completed_at:
331
  job.fail("Session ended")
332
-
333
  # Release resource
334
  resource_id = f"tts_{session_id}"
335
  await self.resource_manager.release(resource_id, delay_seconds=120)
336
-
337
  log_info(
338
  f"🧹 TTS session cleaned up",
339
  session_id=session_id,
340
  total_jobs=tts_session.total_jobs,
341
  total_chars=tts_session.total_chars
342
  )
343
-
344
  except Exception as e:
345
  log_error(
346
  f"❌ Error cleaning up TTS session",
347
  session_id=session_id,
348
  error=str(e)
349
  )
350
-
351
  async def _cleanup_tts_instance(self, tts_instance: TTSInterface):
352
  """Cleanup callback for TTS instance"""
353
  try:
354
  # TTS instances typically don't need special cleanup
355
  log_debug("🧹 TTS instance cleaned up")
356
-
357
  except Exception as e:
358
  log_error(f"❌ Error cleaning up TTS instance", error=str(e))
359
-
360
  def get_stats(self) -> Dict[str, Any]:
361
  """Get TTS manager statistics"""
362
  session_stats = {}
@@ -369,7 +369,7 @@ class TTSLifecycleManager:
369
  "uptime_seconds": (datetime.utcnow() - tts_session.created_at).total_seconds(),
370
  "last_activity": tts_session.last_activity.isoformat()
371
  }
372
-
373
  return {
374
  "active_sessions": len(self.tts_sessions),
375
  "total_active_jobs": sum(len(s.active_jobs) for s in self.tts_sessions.values()),
 
9
  import traceback
10
  import base64
11
 
12
+ from chat_session.event_bus import EventBus, Event, EventType, publish_error
13
+ from chat_session.resource_manager import ResourceManager, ResourceType
14
  from tts.tts_factory import TTSFactory
15
  from tts.tts_interface import TTSInterface
16
  from tts.tts_preprocessor import TTSPreprocessor
 
19
 
20
  class TTSJob:
21
  """TTS synthesis job"""
22
+
23
  def __init__(self, job_id: str, session_id: str, text: str, is_welcome: bool = False):
24
  self.job_id = job_id
25
  self.session_id = session_id
 
30
  self.audio_data: Optional[bytes] = None
31
  self.error: Optional[str] = None
32
  self.chunks_sent = 0
33
+
34
  def complete(self, audio_data: bytes):
35
  """Mark job as completed"""
36
  self.audio_data = audio_data
37
  self.completed_at = datetime.utcnow()
38
+
39
  def fail(self, error: str):
40
  """Mark job as failed"""
41
  self.error = error
 
44
 
45
  class TTSSession:
46
  """TTS session wrapper"""
47
+
48
  def __init__(self, session_id: str, tts_instance: TTSInterface):
49
  self.session_id = session_id
50
  self.tts_instance = tts_instance
 
55
  self.last_activity = datetime.utcnow()
56
  self.total_jobs = 0
57
  self.total_chars = 0
58
+
59
  def update_activity(self):
60
  """Update last activity timestamp"""
61
  self.last_activity = datetime.utcnow()
 
63
 
64
  class TTSLifecycleManager:
65
  """Manages TTS instances lifecycle"""
66
+
67
  def __init__(self, event_bus: EventBus, resource_manager: ResourceManager):
68
  self.event_bus = event_bus
69
  self.resource_manager = resource_manager
 
71
  self.chunk_size = 16384 # 16KB chunks for base64
72
  self._setup_event_handlers()
73
  self._setup_resource_pool()
74
+
75
  def _setup_event_handlers(self):
76
  """Subscribe to TTS-related events"""
77
  self.event_bus.subscribe(EventType.TTS_STARTED, self._handle_tts_start)
78
  self.event_bus.subscribe(EventType.SESSION_ENDED, self._handle_session_ended)
79
+
80
  def _setup_resource_pool(self):
81
  """Setup TTS instance pool"""
82
  self.resource_manager.register_pool(
 
85
  max_idle=3,
86
  max_age_seconds=600 # 10 minutes
87
  )
88
+
89
  async def _create_tts_instance(self) -> Optional[TTSInterface]:
90
  """Factory for creating TTS instances"""
91
  try:
 
93
  if not tts_instance:
94
  log_warning("⚠️ No TTS provider configured")
95
  return None
96
+
97
  log_debug("πŸ”Š Created new TTS instance")
98
  return tts_instance
99
+
100
  except Exception as e:
101
  log_error(f"❌ Failed to create TTS instance", error=str(e))
102
  return None
103
+
104
  async def _handle_tts_start(self, event: Event):
105
  """Handle TTS synthesis request"""
106
  session_id = event.session_id
107
  text = event.data.get("text", "")
108
  is_welcome = event.data.get("is_welcome", False)
109
+
110
  if not text:
111
  log_warning(f"⚠️ Empty text for TTS", session_id=session_id)
112
  return
113
+
114
  try:
115
  log_info(
116
  f"πŸ”Š Starting TTS",
 
118
  text_length=len(text),
119
  is_welcome=is_welcome
120
  )
121
+
122
  # Get or create session
123
  if session_id not in self.tts_sessions:
124
  # Acquire TTS instance from pool
 
129
  resource_type=ResourceType.TTS_INSTANCE,
130
  cleanup_callback=self._cleanup_tts_instance
131
  )
132
+
133
  if not tts_instance:
134
  # No TTS available
135
  await self._handle_no_tts(session_id, text, is_welcome)
136
  return
137
+
138
  # Create session
139
  tts_session = TTSSession(session_id, tts_instance)
140
+
141
  # Get locale from event data or default
142
  locale = event.data.get("locale", "tr")
143
  tts_session.preprocessor = TTSPreprocessor(language=locale)
144
+
145
  self.tts_sessions[session_id] = tts_session
146
  else:
147
  tts_session = self.tts_sessions[session_id]
148
+
149
  # Create job
150
  job_id = f"{session_id}_{tts_session.total_jobs}"
151
  job = TTSJob(job_id, session_id, text, is_welcome)
 
153
  tts_session.total_jobs += 1
154
  tts_session.total_chars += len(text)
155
  tts_session.update_activity()
156
+
157
  # Process TTS
158
  await self._process_tts_job(tts_session, job)
159
+
160
  except Exception as e:
161
  log_error(
162
  f"❌ Failed to start TTS",
 
164
  error=str(e),
165
  traceback=traceback.format_exc()
166
  )
167
+
168
  # Publish error event
169
  await publish_error(
170
  session_id=session_id,
171
  error_type="tts_error",
172
  error_message=f"Failed to synthesize speech: {str(e)}"
173
  )
174
+
175
  async def _process_tts_job(self, tts_session: TTSSession, job: TTSJob):
176
  """Process a TTS job"""
177
  try:
 
180
  job.text,
181
  tts_session.tts_instance.get_preprocessing_flags()
182
  )
183
+
184
  log_debug(
185
  f"πŸ“ TTS preprocessed",
186
  session_id=job.session_id,
187
  original_length=len(job.text),
188
  processed_length=len(processed_text)
189
  )
190
+
191
  # Synthesize audio
192
  audio_data = await tts_session.tts_instance.synthesize(processed_text)
193
+
194
  if not audio_data:
195
  raise ValueError("TTS returned empty audio data")
196
+
197
  job.complete(audio_data)
198
+
199
  log_info(
200
  f"βœ… TTS synthesis complete",
201
  session_id=job.session_id,
202
  audio_size=len(audio_data),
203
  duration_ms=(datetime.utcnow() - job.created_at).total_seconds() * 1000
204
  )
205
+
206
  # Stream audio chunks
207
  await self._stream_audio_chunks(tts_session, job)
208
+
209
  # Move to completed
210
  tts_session.active_jobs.pop(job.job_id, None)
211
  tts_session.completed_jobs.append(job)
212
+
213
  # Keep only last 10 completed jobs
214
  if len(tts_session.completed_jobs) > 10:
215
  tts_session.completed_jobs.pop(0)
216
+
217
  except Exception as e:
218
  job.fail(str(e))
219
+
220
  # Handle specific TTS errors
221
  error_message = str(e)
222
  if "quota" in error_message.lower() or "limit" in error_message.lower():
 
237
  error_type="tts_error",
238
  error_message=error_message
239
  )
240
+
241
  async def _stream_audio_chunks(self, tts_session: TTSSession, job: TTSJob):
242
  """Stream audio data as chunks"""
243
  if not job.audio_data:
244
  return
245
+
246
  # Convert to base64
247
  audio_base64 = base64.b64encode(job.audio_data).decode('utf-8')
248
  total_length = len(audio_base64)
249
  total_chunks = (total_length + self.chunk_size - 1) // self.chunk_size
250
+
251
  log_debug(
252
  f"πŸ“€ Streaming TTS audio",
253
  session_id=job.session_id,
 
255
  base64_size=total_length,
256
  chunks=total_chunks
257
  )
258
+
259
  # Stream chunks
260
  for i in range(0, total_length, self.chunk_size):
261
  chunk = audio_base64[i:i + self.chunk_size]
262
  chunk_index = i // self.chunk_size
263
  is_last = chunk_index == total_chunks - 1
264
+
265
  await self.event_bus.publish(Event(
266
  type=EventType.TTS_CHUNK_READY,
267
  session_id=job.session_id,
 
275
  },
276
  priority=8 # Higher priority for audio chunks
277
  ))
278
+
279
  job.chunks_sent += 1
280
+
281
  # Small delay between chunks to prevent overwhelming
282
  await asyncio.sleep(0.01)
283
+
284
  # Notify completion
285
  await self.event_bus.publish(Event(
286
  type=EventType.TTS_COMPLETED,
 
291
  "is_welcome": job.is_welcome
292
  }
293
  ))
294
+
295
  log_info(
296
  f"βœ… TTS streaming complete",
297
  session_id=job.session_id,
298
  chunks_sent=job.chunks_sent
299
  )
300
+
301
  async def _handle_no_tts(self, session_id: str, text: str, is_welcome: bool):
302
  """Handle case when TTS is not available"""
303
  log_warning(f"⚠️ No TTS available, skipping audio generation", session_id=session_id)
304
+
305
  # Just notify completion without audio
306
  await self.event_bus.publish(Event(
307
  type=EventType.TTS_COMPLETED,
 
312
  "is_welcome": is_welcome
313
  }
314
  ))
315
+
316
  async def _handle_session_ended(self, event: Event):
317
  """Clean up TTS resources when session ends"""
318
  session_id = event.session_id
319
  await self._cleanup_session(session_id)
320
+
321
  async def _cleanup_session(self, session_id: str):
322
  """Clean up TTS session"""
323
  tts_session = self.tts_sessions.pop(session_id, None)
324
  if not tts_session:
325
  return
326
+
327
  try:
328
  # Cancel any active jobs
329
  for job in tts_session.active_jobs.values():
330
  if not job.completed_at:
331
  job.fail("Session ended")
332
+
333
  # Release resource
334
  resource_id = f"tts_{session_id}"
335
  await self.resource_manager.release(resource_id, delay_seconds=120)
336
+
337
  log_info(
338
  f"🧹 TTS session cleaned up",
339
  session_id=session_id,
340
  total_jobs=tts_session.total_jobs,
341
  total_chars=tts_session.total_chars
342
  )
343
+
344
  except Exception as e:
345
  log_error(
346
  f"❌ Error cleaning up TTS session",
347
  session_id=session_id,
348
  error=str(e)
349
  )
350
+
351
  async def _cleanup_tts_instance(self, tts_instance: TTSInterface):
352
  """Cleanup callback for TTS instance"""
353
  try:
354
  # TTS instances typically don't need special cleanup
355
  log_debug("🧹 TTS instance cleaned up")
356
+
357
  except Exception as e:
358
  log_error(f"❌ Error cleaning up TTS instance", error=str(e))
359
+
360
  def get_stats(self) -> Dict[str, Any]:
361
  """Get TTS manager statistics"""
362
  session_stats = {}
 
369
  "uptime_seconds": (datetime.utcnow() - tts_session.created_at).total_seconds(),
370
  "last_activity": tts_session.last_activity.isoformat()
371
  }
372
+
373
  return {
374
  "active_sessions": len(self.tts_sessions),
375
  "total_active_jobs": sum(len(s.active_jobs) for s in self.tts_sessions.values()),