Spaces:
Running
Running
Upload 3 files
Browse files- README.md +1 -1
- app.py +206 -0
- requirements.txt +4 -0
README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
---
|
2 |
title: Qwen Phone Chat
|
3 |
-
emoji:
|
4 |
colorFrom: pink
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
|
|
1 |
---
|
2 |
title: Qwen Phone Chat
|
3 |
+
emoji: π
|
4 |
colorFrom: pink
|
5 |
colorTo: green
|
6 |
sdk: gradio
|
app.py
ADDED
@@ -0,0 +1,206 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import asyncio
|
2 |
+
import base64
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
import secrets
|
6 |
+
from pathlib import Path
|
7 |
+
|
8 |
+
import gradio as gr
|
9 |
+
import numpy as np
|
10 |
+
from dotenv import load_dotenv
|
11 |
+
from fastapi import FastAPI, Request
|
12 |
+
from fastapi.responses import HTMLResponse, StreamingResponse
|
13 |
+
from fastrtc import (
|
14 |
+
AdditionalOutputs,
|
15 |
+
AsyncStreamHandler,
|
16 |
+
Stream,
|
17 |
+
get_cloudflare_turn_credentials_async,
|
18 |
+
wait_for_item,
|
19 |
+
)
|
20 |
+
from gradio.utils import get_space
|
21 |
+
from websockets.asyncio.client import connect
|
22 |
+
|
23 |
+
load_dotenv()
|
24 |
+
|
25 |
+
cur_dir = Path(__file__).parent
|
26 |
+
|
27 |
+
API_KEY = os.getenv("MODELSCOPE_API_KEY", "")
|
28 |
+
API_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen-omni-turbo-realtime-2025-03-26"
|
29 |
+
VOICES = ["Chelsie", "Serena", "Ethan", "Cherry"]
|
30 |
+
headers = {"Authorization": "Bearer " + API_KEY}
|
31 |
+
|
32 |
+
|
33 |
+
class QwenOmniHandler(AsyncStreamHandler):
|
34 |
+
def __init__(
|
35 |
+
self,
|
36 |
+
) -> None:
|
37 |
+
super().__init__(
|
38 |
+
expected_layout="mono",
|
39 |
+
output_sample_rate=24_000,
|
40 |
+
input_sample_rate=16_000,
|
41 |
+
)
|
42 |
+
self.connection = None
|
43 |
+
self.output_queue = asyncio.Queue()
|
44 |
+
|
45 |
+
def copy(self):
|
46 |
+
return QwenOmniHandler()
|
47 |
+
|
48 |
+
@staticmethod
|
49 |
+
def msg_id() -> str:
|
50 |
+
return f"event_{secrets.token_hex(10)}"
|
51 |
+
|
52 |
+
async def start_up(
|
53 |
+
self,
|
54 |
+
):
|
55 |
+
"""Connect to realtime API. Run forever in separate thread to keep connection open."""
|
56 |
+
await self.wait_for_args()
|
57 |
+
voice_id = self.latest_args[1] or "Serena"
|
58 |
+
async with connect(
|
59 |
+
API_URL,
|
60 |
+
additional_headers=headers,
|
61 |
+
) as conn:
|
62 |
+
self.client = conn
|
63 |
+
await conn.send(
|
64 |
+
json.dumps(
|
65 |
+
{
|
66 |
+
"event_id": self.msg_id(),
|
67 |
+
"type": "session.update",
|
68 |
+
"session": {
|
69 |
+
"modalities": [
|
70 |
+
"text",
|
71 |
+
"audio",
|
72 |
+
],
|
73 |
+
"voice": voice_id,
|
74 |
+
"input_audio_format": "pcm16",
|
75 |
+
},
|
76 |
+
}
|
77 |
+
)
|
78 |
+
)
|
79 |
+
self.connection = conn
|
80 |
+
async for data in self.connection:
|
81 |
+
event = json.loads(data)
|
82 |
+
if "type" not in event:
|
83 |
+
continue
|
84 |
+
# Handle interruptions
|
85 |
+
if event["type"] == "input_audio_buffer.speech_started":
|
86 |
+
self.clear_queue()
|
87 |
+
if event["type"] == "response.audio.delta":
|
88 |
+
await self.output_queue.put(
|
89 |
+
(
|
90 |
+
self.output_sample_rate,
|
91 |
+
np.frombuffer(
|
92 |
+
base64.b64decode(event["delta"]), dtype=np.int16
|
93 |
+
).reshape(1, -1),
|
94 |
+
),
|
95 |
+
)
|
96 |
+
|
97 |
+
async def receive(self, frame: tuple[int, np.ndarray]) -> None:
|
98 |
+
if not self.connection:
|
99 |
+
return
|
100 |
+
_, array = frame
|
101 |
+
array = array.squeeze()
|
102 |
+
audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
|
103 |
+
await self.connection.send(
|
104 |
+
json.dumps(
|
105 |
+
{
|
106 |
+
"event_id": self.msg_id(),
|
107 |
+
"type": "input_audio_buffer.append",
|
108 |
+
"audio": audio_message,
|
109 |
+
}
|
110 |
+
)
|
111 |
+
)
|
112 |
+
|
113 |
+
async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
|
114 |
+
return await wait_for_item(self.output_queue)
|
115 |
+
|
116 |
+
async def shutdown(self) -> None:
|
117 |
+
if self.connection:
|
118 |
+
await self.connection.close()
|
119 |
+
self.connection = None
|
120 |
+
|
121 |
+
|
122 |
+
voice = gr.Dropdown(choices=VOICES, value=VOICES[0], type="value", label="Voice")
|
123 |
+
stream = Stream(
|
124 |
+
QwenOmniHandler(),
|
125 |
+
mode="send-receive",
|
126 |
+
modality="audio",
|
127 |
+
additional_inputs=[voice],
|
128 |
+
additional_outputs=None,
|
129 |
+
rtc_configuration=get_cloudflare_turn_credentials_async,
|
130 |
+
concurrency_limit=20,
|
131 |
+
time_limit=180,
|
132 |
+
)
|
133 |
+
|
134 |
+
app = FastAPI()
|
135 |
+
|
136 |
+
@app.post("/telephone/incoming")
|
137 |
+
async def handle_incoming_call(self, request: Request):
|
138 |
+
"""
|
139 |
+
Handle incoming telephone calls (e.g., via Twilio).
|
140 |
+
|
141 |
+
Generates TwiML instructions to connect the incoming call to the
|
142 |
+
WebSocket handler (`/telephone/handler`) for audio streaming.
|
143 |
+
|
144 |
+
Args:
|
145 |
+
request: The FastAPI Request object for the incoming call webhook.
|
146 |
+
|
147 |
+
Returns:
|
148 |
+
An HTMLResponse containing the TwiML instructions as XML.
|
149 |
+
"""
|
150 |
+
from twilio.twiml.voice_response import Connect, VoiceResponse
|
151 |
+
|
152 |
+
if len(stream.connections) > (stream.concurrency_limit or 20):
|
153 |
+
response = VoiceResponse()
|
154 |
+
response.say("Qwen is busy please try again later!")
|
155 |
+
return HTMLResponse(content=str(response), media_type="application/xml")
|
156 |
+
|
157 |
+
response = VoiceResponse()
|
158 |
+
response.say("Connecting to Qwen")
|
159 |
+
connect = Connect()
|
160 |
+
connect.stream(url=f"wss://{request.url.hostname}/telephone/handler")
|
161 |
+
response.append(connect)
|
162 |
+
response.say("The call has been disconnected.")
|
163 |
+
return HTMLResponse(content=str(response), media_type="application/xml")
|
164 |
+
|
165 |
+
stream.mount(app)
|
166 |
+
|
167 |
+
|
168 |
+
@app.get("/")
|
169 |
+
async def _():
|
170 |
+
html_content = """
|
171 |
+
<!DOCTYPE html>
|
172 |
+
<html>
|
173 |
+
<head>
|
174 |
+
<title>Qwen Phone Chat</title>
|
175 |
+
<style>
|
176 |
+
body {
|
177 |
+
font-family: Arial, sans-serif;
|
178 |
+
max-width: 800px;
|
179 |
+
margin: 0 auto;
|
180 |
+
padding: 20px;
|
181 |
+
line-height: 1.6;
|
182 |
+
}
|
183 |
+
pre {
|
184 |
+
background-color: #f5f5f5;
|
185 |
+
padding: 15px;
|
186 |
+
border-radius: 5px;
|
187 |
+
overflow-x: auto;
|
188 |
+
}
|
189 |
+
h1 {
|
190 |
+
color: #333;
|
191 |
+
}
|
192 |
+
</style>
|
193 |
+
</head>
|
194 |
+
<body>
|
195 |
+
<h1>Qwen Phone Chat</h1>
|
196 |
+
<p>Call +1 (877) 853-7936</p>
|
197 |
+
</body>
|
198 |
+
</html>
|
199 |
+
"""
|
200 |
+
return HTMLResponse(content=html_content)
|
201 |
+
|
202 |
+
|
203 |
+
if __name__ == "__main__":
|
204 |
+
import uvicorn
|
205 |
+
|
206 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
fastrtc
|
2 |
+
websockets>=14.0
|
3 |
+
python-dotenv
|
4 |
+
twilio
|