Spaces:
Running
Running
File size: 6,195 Bytes
0b1ae9e c230aed 84665a2 0b1ae9e 18dbd32 0b1ae9e 18dbd32 0b1ae9e 9076931 0b1ae9e 84665a2 0b1ae9e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
import asyncio
import base64
import json
import os
import secrets
from pathlib import Path
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse, StreamingResponse
from fastrtc import (
AdditionalOutputs,
AsyncStreamHandler,
Stream,
get_cloudflare_turn_credentials_async,
wait_for_item,
)
from gradio.utils import get_space
from websockets.asyncio.client import connect
load_dotenv()
cur_dir = Path(__file__).parent
API_KEY = os.getenv("MODELSCOPE_API_KEY", "")
API_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen-omni-turbo-realtime-2025-03-26"
VOICES = ["Chelsie", "Serena", "Ethan", "Cherry"]
headers = {"Authorization": "Bearer " + API_KEY}
class QwenOmniHandler(AsyncStreamHandler):
def __init__(
self,
) -> None:
super().__init__(
expected_layout="mono",
output_sample_rate=24_000,
input_sample_rate=16_000,
)
self.connection = None
self.output_queue = asyncio.Queue()
def copy(self):
return QwenOmniHandler()
@staticmethod
def msg_id() -> str:
return f"event_{secrets.token_hex(10)}"
async def start_up(
self,
):
"""Connect to realtime API. Run forever in separate thread to keep connection open."""
voice_id = "Serena"
print("voice_id", voice_id)
async with connect(
API_URL,
additional_headers=headers,
) as conn:
self.client = conn
await conn.send(
json.dumps(
{
"event_id": self.msg_id(),
"type": "session.update",
"session": {
"modalities": [
"text",
"audio",
],
"voice": voice_id,
"input_audio_format": "pcm16",
},
}
)
)
self.connection = conn
async for data in self.connection:
event = json.loads(data)
if "type" not in event:
continue
# Handle interruptions
if event["type"] == "input_audio_buffer.speech_started":
print("clear queue")
self.clear_queue()
if event["type"] == "response.audio.delta":
print("putting output")
await self.output_queue.put(
(
self.output_sample_rate,
np.frombuffer(
base64.b64decode(event["delta"]), dtype=np.int16
).reshape(1, -1),
),
)
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
if not self.connection:
return
_, array = frame
array = array.squeeze()
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
await self.connection.send(
json.dumps(
{
"event_id": self.msg_id(),
"type": "input_audio_buffer.append",
"audio": audio_message,
}
)
)
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
return await wait_for_item(self.output_queue)
async def shutdown(self) -> None:
if self.connection:
await self.connection.close()
self.connection = None
voice = gr.Dropdown(choices=VOICES, value=VOICES[0], type="value", label="Voice")
stream = Stream(
QwenOmniHandler(),
mode="send-receive",
modality="audio",
additional_inputs=[voice],
additional_outputs=None,
rtc_configuration=get_cloudflare_turn_credentials_async,
concurrency_limit=20,
time_limit=180,
)
app = FastAPI()
@app.post("/telephone/incoming")
async def handle_incoming_call(request: Request):
"""
Handle incoming telephone calls (e.g., via Twilio).
Generates TwiML instructions to connect the incoming call to the
WebSocket handler (`/telephone/handler`) for audio streaming.
Args:
request: The FastAPI Request object for the incoming call webhook.
Returns:
An HTMLResponse containing the TwiML instructions as XML.
"""
from twilio.twiml.voice_response import Connect, VoiceResponse
if len(stream.connections) > (stream.concurrency_limit or 20):
response = VoiceResponse()
response.say("Qwen is busy please try again later!")
return HTMLResponse(content=str(response), media_type="application/xml")
response = VoiceResponse()
response.say("Connecting to Qwen")
connect = Connect()
print("request.url.hostname", request.url.hostname)
connect.stream(url=f"wss://{request.url.hostname}/telephone/handler")
response.append(connect)
response.say("The call has been disconnected.")
return HTMLResponse(content=str(response), media_type="application/xml")
stream.mount(app)
@app.get("/")
async def _():
html_content = """
<!DOCTYPE html>
<html>
<head>
<title>Qwen Phone Chat</title>
<style>
body {
font-family: Arial, sans-serif;
max-width: 800px;
margin: 0 auto;
padding: 20px;
line-height: 1.6;
}
pre {
background-color: #f5f5f5;
padding: 15px;
border-radius: 5px;
overflow-x: auto;
}
h1 {
color: #333;
}
</style>
</head>
<body>
<h1>Qwen Phone Chat</h1>
<p>Call +1 (877) 853-7936</p>
</body>
</html>
"""
return HTMLResponse(content=html_content)
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
|