freddyaboulton HF Staff commited on
Commit
0b1ae9e
Β·
verified Β·
1 Parent(s): a54a773

Upload 3 files

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +206 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: Qwen Phone Chat
3
- emoji: πŸ“ˆ
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
 
1
  ---
2
  title: Qwen Phone Chat
3
+ emoji: πŸ“ž
4
  colorFrom: pink
5
  colorTo: green
6
  sdk: gradio
app.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import json
4
+ import os
5
+ import secrets
6
+ from pathlib import Path
7
+
8
+ import gradio as gr
9
+ import numpy as np
10
+ from dotenv import load_dotenv
11
+ from fastapi import FastAPI, Request
12
+ from fastapi.responses import HTMLResponse, StreamingResponse
13
+ from fastrtc import (
14
+ AdditionalOutputs,
15
+ AsyncStreamHandler,
16
+ Stream,
17
+ get_cloudflare_turn_credentials_async,
18
+ wait_for_item,
19
+ )
20
+ from gradio.utils import get_space
21
+ from websockets.asyncio.client import connect
22
+
23
+ load_dotenv()
24
+
25
+ cur_dir = Path(__file__).parent
26
+
27
+ API_KEY = os.getenv("MODELSCOPE_API_KEY", "")
28
+ API_URL = "wss://dashscope.aliyuncs.com/api-ws/v1/realtime?model=qwen-omni-turbo-realtime-2025-03-26"
29
+ VOICES = ["Chelsie", "Serena", "Ethan", "Cherry"]
30
+ headers = {"Authorization": "Bearer " + API_KEY}
31
+
32
+
33
+ class QwenOmniHandler(AsyncStreamHandler):
34
+ def __init__(
35
+ self,
36
+ ) -> None:
37
+ super().__init__(
38
+ expected_layout="mono",
39
+ output_sample_rate=24_000,
40
+ input_sample_rate=16_000,
41
+ )
42
+ self.connection = None
43
+ self.output_queue = asyncio.Queue()
44
+
45
+ def copy(self):
46
+ return QwenOmniHandler()
47
+
48
+ @staticmethod
49
+ def msg_id() -> str:
50
+ return f"event_{secrets.token_hex(10)}"
51
+
52
+ async def start_up(
53
+ self,
54
+ ):
55
+ """Connect to realtime API. Run forever in separate thread to keep connection open."""
56
+ await self.wait_for_args()
57
+ voice_id = self.latest_args[1] or "Serena"
58
+ async with connect(
59
+ API_URL,
60
+ additional_headers=headers,
61
+ ) as conn:
62
+ self.client = conn
63
+ await conn.send(
64
+ json.dumps(
65
+ {
66
+ "event_id": self.msg_id(),
67
+ "type": "session.update",
68
+ "session": {
69
+ "modalities": [
70
+ "text",
71
+ "audio",
72
+ ],
73
+ "voice": voice_id,
74
+ "input_audio_format": "pcm16",
75
+ },
76
+ }
77
+ )
78
+ )
79
+ self.connection = conn
80
+ async for data in self.connection:
81
+ event = json.loads(data)
82
+ if "type" not in event:
83
+ continue
84
+ # Handle interruptions
85
+ if event["type"] == "input_audio_buffer.speech_started":
86
+ self.clear_queue()
87
+ if event["type"] == "response.audio.delta":
88
+ await self.output_queue.put(
89
+ (
90
+ self.output_sample_rate,
91
+ np.frombuffer(
92
+ base64.b64decode(event["delta"]), dtype=np.int16
93
+ ).reshape(1, -1),
94
+ ),
95
+ )
96
+
97
+ async def receive(self, frame: tuple[int, np.ndarray]) -> None:
98
+ if not self.connection:
99
+ return
100
+ _, array = frame
101
+ array = array.squeeze()
102
+ audio_message = base64.b64encode(array.tobytes()).decode("utf-8")
103
+ await self.connection.send(
104
+ json.dumps(
105
+ {
106
+ "event_id": self.msg_id(),
107
+ "type": "input_audio_buffer.append",
108
+ "audio": audio_message,
109
+ }
110
+ )
111
+ )
112
+
113
+ async def emit(self) -> tuple[int, np.ndarray] | AdditionalOutputs | None:
114
+ return await wait_for_item(self.output_queue)
115
+
116
+ async def shutdown(self) -> None:
117
+ if self.connection:
118
+ await self.connection.close()
119
+ self.connection = None
120
+
121
+
122
+ voice = gr.Dropdown(choices=VOICES, value=VOICES[0], type="value", label="Voice")
123
+ stream = Stream(
124
+ QwenOmniHandler(),
125
+ mode="send-receive",
126
+ modality="audio",
127
+ additional_inputs=[voice],
128
+ additional_outputs=None,
129
+ rtc_configuration=get_cloudflare_turn_credentials_async,
130
+ concurrency_limit=20,
131
+ time_limit=180,
132
+ )
133
+
134
+ app = FastAPI()
135
+
136
+ @app.post("/telephone/incoming")
137
+ async def handle_incoming_call(self, request: Request):
138
+ """
139
+ Handle incoming telephone calls (e.g., via Twilio).
140
+
141
+ Generates TwiML instructions to connect the incoming call to the
142
+ WebSocket handler (`/telephone/handler`) for audio streaming.
143
+
144
+ Args:
145
+ request: The FastAPI Request object for the incoming call webhook.
146
+
147
+ Returns:
148
+ An HTMLResponse containing the TwiML instructions as XML.
149
+ """
150
+ from twilio.twiml.voice_response import Connect, VoiceResponse
151
+
152
+ if len(stream.connections) > (stream.concurrency_limit or 20):
153
+ response = VoiceResponse()
154
+ response.say("Qwen is busy please try again later!")
155
+ return HTMLResponse(content=str(response), media_type="application/xml")
156
+
157
+ response = VoiceResponse()
158
+ response.say("Connecting to Qwen")
159
+ connect = Connect()
160
+ connect.stream(url=f"wss://{request.url.hostname}/telephone/handler")
161
+ response.append(connect)
162
+ response.say("The call has been disconnected.")
163
+ return HTMLResponse(content=str(response), media_type="application/xml")
164
+
165
+ stream.mount(app)
166
+
167
+
168
+ @app.get("/")
169
+ async def _():
170
+ html_content = """
171
+ <!DOCTYPE html>
172
+ <html>
173
+ <head>
174
+ <title>Qwen Phone Chat</title>
175
+ <style>
176
+ body {
177
+ font-family: Arial, sans-serif;
178
+ max-width: 800px;
179
+ margin: 0 auto;
180
+ padding: 20px;
181
+ line-height: 1.6;
182
+ }
183
+ pre {
184
+ background-color: #f5f5f5;
185
+ padding: 15px;
186
+ border-radius: 5px;
187
+ overflow-x: auto;
188
+ }
189
+ h1 {
190
+ color: #333;
191
+ }
192
+ </style>
193
+ </head>
194
+ <body>
195
+ <h1>Qwen Phone Chat</h1>
196
+ <p>Call +1 (877) 853-7936</p>
197
+ </body>
198
+ </html>
199
+ """
200
+ return HTMLResponse(content=html_content)
201
+
202
+
203
+ if __name__ == "__main__":
204
+ import uvicorn
205
+
206
+ uvicorn.run(app, host="0.0.0.0", port=7860)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastrtc
2
+ websockets>=14.0
3
+ python-dotenv
4
+ twilio