Spaces:
Building
Building
Update websocket_handler.py
Browse files- websocket_handler.py +9 -26
websocket_handler.py
CHANGED
@@ -600,10 +600,6 @@ async def generate_and_stream_tts(
|
|
600 |
# Generate audio
|
601 |
audio_data = await tts_provider.synthesize(text)
|
602 |
|
603 |
-
# Debug log to check audio data
|
604 |
-
log_debug(f"Audio data type: {type(audio_data)}, length: {len(audio_data)}")
|
605 |
-
log_debug(f"First 10 bytes: {audio_data[:10]}")
|
606 |
-
|
607 |
# Change state to playing
|
608 |
await session.change_state(ConversationState.PLAYING_AUDIO)
|
609 |
await websocket.send_json({
|
@@ -612,46 +608,34 @@ async def generate_and_stream_tts(
|
|
612 |
"to": "playing_audio"
|
613 |
})
|
614 |
|
|
|
|
|
|
|
615 |
# Stream audio in chunks
|
616 |
chunk_size = session.audio_chunk_size
|
617 |
-
|
|
|
618 |
|
619 |
-
for i in range(0,
|
620 |
# Check for cancellation
|
621 |
if asyncio.current_task().cancelled():
|
622 |
break
|
623 |
|
624 |
-
chunk =
|
625 |
chunk_index = i // chunk_size
|
626 |
|
627 |
-
# IMPORTANT: Ensure chunk is bytes before encoding
|
628 |
-
if isinstance(chunk, str):
|
629 |
-
# If chunk is already a string, it might be base64 already
|
630 |
-
chunk_base64 = chunk
|
631 |
-
else:
|
632 |
-
# Convert bytes to base64
|
633 |
-
chunk_base64 = base64.b64encode(chunk).decode('utf-8')
|
634 |
-
|
635 |
await websocket.send_json({
|
636 |
"type": "tts_audio",
|
637 |
-
"data":
|
638 |
"chunk_index": chunk_index,
|
639 |
"total_chunks": total_chunks,
|
640 |
"is_last": chunk_index == total_chunks - 1,
|
641 |
-
"mime_type": "audio/mpeg"
|
642 |
})
|
643 |
|
644 |
# Small delay to prevent overwhelming the client
|
645 |
await asyncio.sleep(0.01)
|
646 |
|
647 |
-
# Send state back to idle after completion
|
648 |
-
await session.change_state(ConversationState.IDLE)
|
649 |
-
await websocket.send_json({
|
650 |
-
"type": "state_change",
|
651 |
-
"from": "playing_audio",
|
652 |
-
"to": "idle"
|
653 |
-
})
|
654 |
-
|
655 |
log_info(
|
656 |
f"TTS streaming completed",
|
657 |
session_id=session.session.session_id,
|
@@ -666,7 +650,6 @@ async def generate_and_stream_tts(
|
|
666 |
log_error(
|
667 |
f"TTS generation error",
|
668 |
error=str(e),
|
669 |
-
traceback=traceback.format_exc(),
|
670 |
session_id=session.session.session_id
|
671 |
)
|
672 |
await websocket.send_json({
|
|
|
600 |
# Generate audio
|
601 |
audio_data = await tts_provider.synthesize(text)
|
602 |
|
|
|
|
|
|
|
|
|
603 |
# Change state to playing
|
604 |
await session.change_state(ConversationState.PLAYING_AUDIO)
|
605 |
await websocket.send_json({
|
|
|
608 |
"to": "playing_audio"
|
609 |
})
|
610 |
|
611 |
+
# Convert audio to base64 for transmission
|
612 |
+
audio_base64 = base64.b64encode(audio_data).decode('utf-8')
|
613 |
+
|
614 |
# Stream audio in chunks
|
615 |
chunk_size = session.audio_chunk_size
|
616 |
+
total_length = len(audio_base64)
|
617 |
+
total_chunks = (total_length + chunk_size - 1) // chunk_size
|
618 |
|
619 |
+
for i in range(0, total_length, chunk_size):
|
620 |
# Check for cancellation
|
621 |
if asyncio.current_task().cancelled():
|
622 |
break
|
623 |
|
624 |
+
chunk = audio_base64[i:i + chunk_size]
|
625 |
chunk_index = i // chunk_size
|
626 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
627 |
await websocket.send_json({
|
628 |
"type": "tts_audio",
|
629 |
+
"data": chunk,
|
630 |
"chunk_index": chunk_index,
|
631 |
"total_chunks": total_chunks,
|
632 |
"is_last": chunk_index == total_chunks - 1,
|
633 |
+
"mime_type": "audio/mpeg" # MP3 format için
|
634 |
})
|
635 |
|
636 |
# Small delay to prevent overwhelming the client
|
637 |
await asyncio.sleep(0.01)
|
638 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
639 |
log_info(
|
640 |
f"TTS streaming completed",
|
641 |
session_id=session.session.session_id,
|
|
|
650 |
log_error(
|
651 |
f"TTS generation error",
|
652 |
error=str(e),
|
|
|
653 |
session_id=session.session.session_id
|
654 |
)
|
655 |
await websocket.send_json({
|