STT / modules /routers /transcription.py
goldpulpy's picture
Upload space
13f6d73
from fastapi import APIRouter, WebSocket
from starlette.websockets import WebSocketState
from modules.models.speech_recognizer import SpeechRecognizer
router = APIRouter()
@router.websocket("/transcribe")
async def transcribe_audio(websocket: WebSocket):
recognizer = SpeechRecognizer("ru", "deepdml/faster-whisper-large-v3-turbo-ct2")
await websocket.accept()
message_buffer = ""
last_status = None
try:
while websocket.client_state == WebSocketState.CONNECTING or websocket.client_state == WebSocketState.CONNECTED:
try:
# Receive audio data
audio = await websocket.receive_bytes()
recognizer.append_audio(audio)
recognized_text = recognizer.process_buffer()
status = recognizer.get_status()
# If the status hasn't changed and no new text is recognized, skip to next iteration
if status == last_status and recognized_text[0] is None:
continue
last_status = status
# Handle recognized text based on status
if status == 'voice':
message_buffer += recognized_text[2]
if len(message_buffer) > 0 and not str.isspace(message_buffer):
print(f"Sending partial message: {message_buffer}")
obj = {"text": message_buffer, "is_complete": False}
await websocket.send_json(obj)
elif status == 'nonvoice':
message_buffer += recognizer.flush()[2]
if len(message_buffer) > 0 and not str.isspace(message_buffer):
print(f"Sending complete message: {message_buffer}")
obj = {"text": message_buffer, "is_complete": True}
await websocket.send_json(obj)
message_buffer = "" # Reset buffer after sending message
except (RuntimeError, ConnectionError) as e:
# Catch errors due to disconnection or network issues
print(f"Client disconnected: {e}")
break
except Exception as e:
print(f"Unexpected error: {e}")
break
finally:
# Ensure proper cleanup even if the loop exits
recognizer.clear_buffer()
try:
await websocket.close()
except Exception as e:
print(f"Error closing websocket: {e}")