qwen-phone-chat / app.py
freddyaboulton's picture
Update app.py
18dbd32 verified
import asyncio
import base64
import json
import os
import secrets
from pathlib import Path
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from fastrtc import (
AdditionalOutputs,
AsyncStreamHandler,
Stream,
get_cloudflare_turn_credentials_async,
wait_for_item,
)
from gradio.utils import get_space
from websockets.asyncio.client import connect
load_dotenv()
cur_dir = Path(__file__).parent
API_KEY = os.getenv("MODELSCOPE_API_KEY", "")
API_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen-omni-turbo-realtime-2025-03-26"
VOICES = ["Chelsie", "Serena", "Ethan", "Cherry"]
headers = {"Authorization": "Bearer " + API_KEY}
class QwenOmniHandler(AsyncStreamHandler):
def __init__(
self,
) -> None:
super().__init__(
expected_layout="mono",
output_sample_rate=24_000,
input_sample_rate=16_000,
)
self.connection = None
self.output_queue = asyncio.Queue()
def copy(self):
return QwenOmniHandler()
@staticmethod
def msg_id() -> str:
return f"event_{secrets.token_hex(10)}"
async def start_up(
self,
):
"""Connect to realtime API. Run forever in separate thread to keep connection open."""
voice_id = "Serena"
print("voice_id", voice_id)
async with connect(
API_URL,
additional_headers=headers,
) as conn:
self.client = conn
await conn.send(
json.dumps(
{
"event_id": self.msg_id(),
"type": "session.update",
"session": {
"modalities": [
"text",
"audio",
],
"voice": voice_id,
"input_audio_format": "pcm16",
},
}
)
)
self.connection = conn
async for data in self.connection:
event = json.loads(data)
if "type" not in event:
continue
# Handle interruptions
if event["type"] == "input_audio_buffer.speech_started":
print("clear queue")
self.clear_queue()
if event["type"] == "response.audio.delta":
print("putting output")
await self.output_queue.put(
(
self.output_sample_rate,
np.frombuffer(
base64.b64decode(event["delta"]), dtype=np.int16
).reshape(1, -1),
),
)
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
if not self.connection:
return
_, array = frame
array = array.squeeze()
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
await self.connection.send(
json.dumps(
{
"event_id": self.msg_id(),
"type": "input_audio_buffer.append",
"audio": audio_message,
}
)
)
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
return await wait_for_item(self.output_queue)
async def shutdown(self) -> None:
if self.connection:
await self.connection.close()
self.connection = None
voice = gr.Dropdown(choices=VOICES, value=VOICES[0], type="value", label="Voice")
stream = Stream(
QwenOmniHandler(),
mode="send-receive",
modality="audio",
additional_inputs=[voice],
additional_outputs=None,
rtc_configuration=get_cloudflare_turn_credentials_async,
concurrency_limit=20,
time_limit=180,
)
app = FastAPI()
@app.post("/telephone/incoming")
async def handle_incoming_call(request: Request):
"""
Handle incoming telephone calls (e.g., via Twilio).
Generates TwiML instructions to connect the incoming call to the
WebSocket handler (`/telephone/handler`) for audio streaming.
Args:
request: The FastAPI Request object for the incoming call webhook.
Returns:
An HTMLResponse containing the TwiML instructions as XML.
"""
from twilio.twiml.voice_response import Connect, VoiceResponse
if len(stream.connections) > (stream.concurrency_limit or 20):
response = VoiceResponse()
response.say("Qwen is busy please try again later!")
return HTMLResponse(content=str(response), media_type="application/xml")
response = VoiceResponse()
response.say("Connecting to Qwen")
connect = Connect()
print("request.url.hostname", request.url.hostname)
connect.stream(url=f"wss://{request.url.hostname}/telephone/handler")
response.append(connect)
response.say("The call has been disconnected.")
return HTMLResponse(content=str(response), media_type="application/xml")
stream.mount(app)
@app.get("/")
async def _():
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Qwen Phone Chat</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
line-height: 1.6;
}
pre {
background-color: #f5f5f5;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}
h1 {
color: #333;
}
</style>
</head>
<body>
<h1>Qwen Phone Chat</h1>
<p>Call +1 (877) 853-7936</p>
</body>
</html>
"""
return HTMLResponse(content=html_content)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)