freddyaboulton HF Staff commited on
Commit
07e0298
·
verified ·
1 Parent(s): 86a3388

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +7 -4
  2. app.py +120 -0
  3. index.html +839 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,12 +1,15 @@
1
  ---
2
- title: Talk To Llama4
3
- emoji: 💻
4
  colorFrom: purple
5
- colorTo: pink
6
  sdk: gradio
7
  sdk_version: 5.23.3
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Talk to Llama 4
3
+ emoji: 🦙
4
  colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.23.3
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Talk to Llama 4 using Groq + Cloudflare
12
+ tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|GROQ_API_KEY]
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ import gradio as gr
6
+ import numpy as np
7
+ from dotenv import load_dotenv
8
+ from fastapi import FastAPI
9
+ from fastapi.responses import HTMLResponse, StreamingResponse
10
+ from fastrtc import (
11
+ AdditionalOutputs,
12
+ ReplyOnPause,
13
+ Stream,
14
+ audio_to_bytes,
15
+ get_cloudflare_turn_credentials_async,
16
+ get_current_context,
17
+ get_tts_model,
18
+ )
19
+ from groq import Groq
20
+ from numpy.typing import NDArray
21
+
22
+ curr_dir = Path(__file__).parent
23
+ load_dotenv()
24
+
25
+ tts_model = get_tts_model()
26
+ groq = Groq(api_key=os.getenv("GROQ_API_KEY"))
27
+
28
+
29
+ conversations: dict[str, list[dict[str, str]]] = {}
30
+
31
+
32
+ def response(user_audio: tuple[int, NDArray[np.int16]]):
33
+ context = get_current_context()
34
+ if context.webrtc_id not in conversations:
35
+ conversations[context.webrtc_id] = [
36
+ {
37
+ "role": "system",
38
+ "content": (
39
+ "You are a helpful assistant that can answer questions and help with tasks."
40
+ 'Please return a short (that will be converted to audio using a text-to-speech model) response and long response to this question. They can be the same if appropriate. Please return in JSON format\n\n{"short":, "long"}\n\n'
41
+ ),
42
+ }
43
+ ]
44
+ messages = conversations[context.webrtc_id]
45
+
46
+ transcription = groq.audio.transcriptions.create(
47
+ file=("audio.wav", audio_to_bytes(user_audio)),
48
+ model="distil-whisper-large-v3-en",
49
+ response_format="verbose_json",
50
+ )
51
+ print(transcription.text)
52
+
53
+ messages.append({"role": "user", "content": transcription.text})
54
+
55
+ completion = groq.chat.completions.create( # type: ignore
56
+ model="meta-llama/llama-4-scout-17b-16e-instruct",
57
+ messages=messages, # type: ignore
58
+ temperature=1,
59
+ max_completion_tokens=1024,
60
+ top_p=1,
61
+ stream=False,
62
+ response_format={"type": "json_object"},
63
+ stop=None,
64
+ )
65
+ response = completion.choices[0].message.content
66
+ response = json.loads(response)
67
+ short_response = response["short"]
68
+ long_response = response["long"]
69
+ messages.append({"role": "assistant", "content": long_response})
70
+ conversations[context.webrtc_id] = messages
71
+ yield from tts_model.stream_tts_sync(short_response)
72
+ yield AdditionalOutputs(messages)
73
+
74
+
75
+ stream = Stream(
76
+ ReplyOnPause(response),
77
+ modality="audio",
78
+ mode="send-receive",
79
+ additional_outputs=[gr.Chatbot(type="messages")],
80
+ additional_outputs_handler=lambda old, new: new,
81
+ rtc_configuration=get_cloudflare_turn_credentials_async,
82
+ )
83
+
84
+ app = FastAPI()
85
+ stream.mount(app)
86
+
87
+
88
+ @app.get("/")
89
+ async def _():
90
+ rtc_config = await get_cloudflare_turn_credentials_async()
91
+ html_content = (curr_dir / "index.html").read_text()
92
+ html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
93
+ return HTMLResponse(content=html_content)
94
+
95
+
96
+ @app.get("/outputs")
97
+ async def _(webrtc_id: str):
98
+ async def output_stream():
99
+ async for output in stream.output_stream(webrtc_id):
100
+ state = output.args[0]
101
+ for msg in state[-2:]:
102
+ data = {
103
+ "message": msg,
104
+ }
105
+ yield f"event: output\ndata: {json.dumps(data)}\n\n"
106
+
107
+ return StreamingResponse(output_stream(), media_type="text/event-stream")
108
+
109
+
110
+ if __name__ == "__main__":
111
+ import os
112
+
113
+ if (mode := os.getenv("MODE")) == "UI":
114
+ stream.ui.launch(server_port=7860)
115
+ elif mode == "PHONE":
116
+ raise ValueError("Phone mode not supported")
117
+ else:
118
+ import uvicorn
119
+
120
+ uvicorn.run(app, host="0.0.0.0", port=7860)
index.html ADDED
@@ -0,0 +1,839 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Talk to Llama 4</title>
8
+ <style>
9
+ :root {
10
+ --color-primary: #3b82f6;
11
+ --color-secondary: #f97316;
12
+ --color-background: #0f172a;
13
+ --color-surface: #1e293b;
14
+ --color-text: #f1f5f9;
15
+ --color-message-user: #334155;
16
+ --color-message-assistant: #1e40af;
17
+ --gradient-primary: linear-gradient(135deg, #3b82f6, #8b5cf6);
18
+ --gradient-secondary: linear-gradient(135deg, #f97316, #ec4899);
19
+ --boxSize: 8px;
20
+ --gutter: 4px;
21
+ }
22
+
23
+ * {
24
+ box-sizing: border-box;
25
+ margin: 0;
26
+ padding: 0;
27
+ }
28
+
29
+ body {
30
+ font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
31
+ background-color: var(--color-background);
32
+ color: var(--color-text);
33
+ min-height: 100vh;
34
+ display: flex;
35
+ flex-direction: column;
36
+ align-items: center;
37
+ padding: 2rem 1rem;
38
+ background-image:
39
+ radial-gradient(circle at 25% 25%, rgba(59, 130, 246, 0.1) 0%, transparent 50%),
40
+ radial-gradient(circle at 75% 75%, rgba(249, 115, 22, 0.1) 0%, transparent 50%);
41
+ }
42
+
43
+ .header-container {
44
+ display: flex;
45
+ align-items: center;
46
+ gap: 2rem;
47
+ margin-bottom: 2rem;
48
+ width: 100%;
49
+ max-width: 800px;
50
+ animation: fadeIn 1s ease-out;
51
+ }
52
+
53
+ .header {
54
+ text-align: left;
55
+ }
56
+
57
+ .header h1 {
58
+ font-size: 2.5rem;
59
+ margin-bottom: 0.5rem;
60
+ background: var(--gradient-primary);
61
+ -webkit-background-clip: text;
62
+ -webkit-text-fill-color: transparent;
63
+ font-weight: 800;
64
+ }
65
+
66
+ .header h2 {
67
+ font-size: 1.2rem;
68
+ font-weight: 400;
69
+ color: rgba(241, 245, 249, 0.8);
70
+ margin-bottom: 1rem;
71
+ }
72
+
73
+ .logo {
74
+ width: 120px;
75
+ height: 120px;
76
+ background: var(--color-surface);
77
+ border-radius: 50%;
78
+ display: flex;
79
+ align-items: center;
80
+ justify-content: center;
81
+ box-shadow: 0 15px 30px rgba(0, 0, 0, 0.3);
82
+ position: relative;
83
+ overflow: hidden;
84
+ animation: float 6s ease-in-out infinite;
85
+ flex-shrink: 0;
86
+ }
87
+
88
+ .logo::before {
89
+ content: "";
90
+ position: absolute;
91
+ width: 200%;
92
+ height: 200%;
93
+ background: var(--gradient-secondary);
94
+ opacity: 0.2;
95
+ animation: rotate 10s linear infinite;
96
+ }
97
+
98
+ .logo img {
99
+ width: 75%;
100
+ height: 75%;
101
+ object-fit: contain;
102
+ z-index: 2;
103
+ }
104
+
105
+ .container {
106
+ width: 100%;
107
+ max-width: 800px;
108
+ background-color: var(--color-surface);
109
+ border-radius: 1rem;
110
+ box-shadow: 0 25px 50px -12px rgba(0, 0, 0, 0.25);
111
+ overflow: hidden;
112
+ animation: slideUp 0.5s ease-out;
113
+ }
114
+
115
+ .chat-container {
116
+ height: 400px;
117
+ overflow-y: auto;
118
+ padding: 1.5rem;
119
+ display: flex;
120
+ flex-direction: column;
121
+ gap: 1rem;
122
+ scroll-behavior: smooth;
123
+ }
124
+
125
+ .message {
126
+ max-width: 80%;
127
+ padding: 1rem;
128
+ border-radius: 1rem;
129
+ line-height: 1.5;
130
+ animation: fadeIn 0.3s ease-out;
131
+ }
132
+
133
+ .message.user {
134
+ background-color: var(--color-message-user);
135
+ color: var(--color-text);
136
+ align-self: flex-end;
137
+ border-bottom-right-radius: 0.25rem;
138
+ }
139
+
140
+ .message.assistant {
141
+ background-color: var(--color-message-assistant);
142
+ color: var(--color-text);
143
+ align-self: flex-start;
144
+ border-bottom-left-radius: 0.25rem;
145
+ }
146
+
147
+ .wave-visualizer {
148
+ height: 100px;
149
+ padding: 1rem;
150
+ background-color: rgba(30, 41, 59, 0.8);
151
+ display: flex;
152
+ align-items: center;
153
+ justify-content: center;
154
+ position: relative;
155
+ overflow: hidden;
156
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
157
+ }
158
+
159
+ .box-container {
160
+ display: flex;
161
+ justify-content: space-between;
162
+ align-items: center;
163
+ width: 100%;
164
+ height: 64px;
165
+ padding: 0 1rem;
166
+ }
167
+
168
+ .box {
169
+ height: 100%;
170
+ width: var(--boxSize);
171
+ background: var(--gradient-primary);
172
+ border-radius: 4px;
173
+ transform: scaleY(0.1);
174
+ transition: transform 0.05s ease;
175
+ }
176
+
177
+ .controls {
178
+ display: flex;
179
+ justify-content: center;
180
+ align-items: center;
181
+ padding: 1.5rem;
182
+ gap: 1rem;
183
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
184
+ }
185
+
186
+ #start-button {
187
+ display: flex;
188
+ align-items: center;
189
+ justify-content: center;
190
+ background: var(--gradient-primary);
191
+ color: white;
192
+ border: none;
193
+ border-radius: 9999px;
194
+ padding: 0.75rem 1.5rem;
195
+ font-size: 1rem;
196
+ font-weight: 600;
197
+ cursor: pointer;
198
+ transition: all 0.3s ease;
199
+ box-shadow: 0 4px 14px rgba(59, 130, 246, 0.4);
200
+ }
201
+
202
+ #start-button:hover {
203
+ transform: translateY(-2px);
204
+ box-shadow: 0 6px 20px rgba(59, 130, 246, 0.6);
205
+ }
206
+
207
+ #start-button:active {
208
+ transform: translateY(1px);
209
+ }
210
+
211
+ .icon-with-spinner {
212
+ display: flex;
213
+ align-items: center;
214
+ justify-content: center;
215
+ gap: 12px;
216
+ min-width: 180px;
217
+ }
218
+
219
+ .spinner {
220
+ width: 20px;
221
+ height: 20px;
222
+ border: 2px solid white;
223
+ border-top-color: transparent;
224
+ border-radius: 50%;
225
+ animation: spin 1s linear infinite;
226
+ flex-shrink: 0;
227
+ }
228
+
229
+ .pulse-container {
230
+ display: flex;
231
+ align-items: center;
232
+ justify-content: center;
233
+ gap: 12px;
234
+ }
235
+
236
+ .pulse-circle {
237
+ width: 20px;
238
+ height: 20px;
239
+ border-radius: 50%;
240
+ background: var(--color-secondary);
241
+ opacity: 0.85;
242
+ flex-shrink: 0;
243
+ transform: scale(var(--audio-level, 1));
244
+ transition: transform 0.1s ease;
245
+ }
246
+
247
+ .mute-toggle {
248
+ width: 24px;
249
+ height: 24px;
250
+ cursor: pointer;
251
+ margin-left: 12px;
252
+ flex-shrink: 0;
253
+ filter: drop-shadow(0 4px 6px rgba(0, 0, 0, 0.2));
254
+ }
255
+
256
+ .mute-toggle svg {
257
+ width: 100%;
258
+ height: 100%;
259
+ stroke: white;
260
+ }
261
+
262
+ .typing-indicator {
263
+ padding: 0.5rem 1rem;
264
+ display: inline-flex;
265
+ align-items: center;
266
+ background-color: var(--color-message-assistant);
267
+ border-radius: 1rem;
268
+ align-self: flex-start;
269
+ margin-bottom: 0.5rem;
270
+ display: none;
271
+ animation: fadeIn 0.3s ease-out;
272
+ }
273
+
274
+ .dots {
275
+ display: inline-flex;
276
+ gap: 4px;
277
+ }
278
+
279
+ .dot {
280
+ width: 8px;
281
+ height: 8px;
282
+ background-color: white;
283
+ border-radius: 50%;
284
+ animation: bounce 1.5s infinite;
285
+ opacity: 0.7;
286
+ }
287
+
288
+ .dot:nth-child(2) {
289
+ animation-delay: 0.15s;
290
+ }
291
+
292
+ .dot:nth-child(3) {
293
+ animation-delay: 0.3s;
294
+ }
295
+
296
+ .toast {
297
+ position: fixed;
298
+ top: 20px;
299
+ left: 50%;
300
+ transform: translateX(-50%);
301
+ padding: 1rem 1.5rem;
302
+ border-radius: 0.5rem;
303
+ font-size: 0.875rem;
304
+ z-index: 1000;
305
+ display: none;
306
+ box-shadow: 0 10px 25px rgba(0, 0, 0, 0.3);
307
+ animation: slideDown 0.3s ease-out;
308
+ }
309
+
310
+ .toast.error {
311
+ background-color: #ef4444;
312
+ color: white;
313
+ }
314
+
315
+ .toast.warning {
316
+ background-color: #f59e0b;
317
+ color: black;
318
+ }
319
+
320
+ #audio-output {
321
+ display: none;
322
+ }
323
+
324
+ @keyframes float {
325
+
326
+ 0%,
327
+ 100% {
328
+ transform: translateY(0);
329
+ }
330
+
331
+ 50% {
332
+ transform: translateY(-10px);
333
+ }
334
+ }
335
+
336
+ @keyframes rotate {
337
+ 0% {
338
+ transform: rotate(0deg);
339
+ }
340
+
341
+ 100% {
342
+ transform: rotate(360deg);
343
+ }
344
+ }
345
+
346
+ @keyframes spin {
347
+ to {
348
+ transform: rotate(360deg);
349
+ }
350
+ }
351
+
352
+ @keyframes bounce {
353
+
354
+ 0%,
355
+ 100% {
356
+ transform: translateY(0);
357
+ }
358
+
359
+ 50% {
360
+ transform: translateY(-4px);
361
+ }
362
+ }
363
+
364
+ @keyframes fadeIn {
365
+ from {
366
+ opacity: 0;
367
+ }
368
+
369
+ to {
370
+ opacity: 1;
371
+ }
372
+ }
373
+
374
+ @keyframes slideUp {
375
+ from {
376
+ opacity: 0;
377
+ transform: translateY(20px);
378
+ }
379
+
380
+ to {
381
+ opacity: 1;
382
+ transform: translateY(0);
383
+ }
384
+ }
385
+
386
+ @keyframes slideDown {
387
+ from {
388
+ opacity: 0;
389
+ transform: translate(-50%, -20px);
390
+ }
391
+
392
+ to {
393
+ opacity: 1;
394
+ transform: translate(-50%, 0);
395
+ }
396
+ }
397
+ </style>
398
+ </head>
399
+
400
+ <body>
401
+ <div id="error-toast" class="toast"></div>
402
+
403
+ <div class="header-container">
404
+ <div class="logo">
405
+ <img src="https://huggingface.co/datasets/freddyaboulton/bucket/resolve/main/Video%26Audio%20huggy.png"
406
+ alt="LLaMA Logo">
407
+ </div>
408
+ <div class="header">
409
+ <h1>Talk to LLaMA 4</h1>
410
+ <h2>Experience seamless real-time conversation thanks to Cloudflare and Hugging Face's FastRTC.</h2>
411
+ </div>
412
+ </div>
413
+
414
+ <div class="container">
415
+ <div class="chat-container" id="chat-messages">
416
+ <!-- Messages will appear here -->
417
+ </div>
418
+
419
+ <div class="typing-indicator" id="typing-indicator">
420
+ <div class="dots">
421
+ <div class="dot"></div>
422
+ <div class="dot"></div>
423
+ <div class="dot"></div>
424
+ </div>
425
+ </div>
426
+
427
+ <div class="wave-visualizer">
428
+ <div class="box-container" id="box-container">
429
+ <!-- Boxes will be dynamically added here -->
430
+ </div>
431
+ </div>
432
+
433
+ <div class="controls">
434
+ <button id="start-button">Start Conversation</button>
435
+ </div>
436
+ </div>
437
+
438
+ <audio id="audio-output"></audio>
439
+
440
+ <script>
441
+ let peerConnection;
442
+ let webrtc_id;
443
+ const startButton = document.getElementById('start-button');
444
+ const chatMessages = document.getElementById('chat-messages');
445
+ const boxContainer = document.getElementById('box-container');
446
+ const typingIndicator = document.getElementById('typing-indicator');
447
+ const audioOutput = document.getElementById('audio-output');
448
+
449
+ let audioLevel = 0;
450
+ let animationFrame_input, animationFrame_output;
451
+ let audioContext_input, audioContext_output;
452
+ let analyser_input, dataArray_input;
453
+ let analyser_output, dataArray_output;
454
+ let audioSource_input, audioSource_output;
455
+ let messages = [];
456
+ let eventSource;
457
+ let isMuted = false;
458
+
459
+ // Create wave visualizer boxes
460
+ const numBars = 32;
461
+ for (let i = 0; i < numBars; i++) {
462
+ const box = document.createElement('div');
463
+ box.className = 'box';
464
+ boxContainer.appendChild(box);
465
+ }
466
+
467
+ // SVG Icons
468
+ const micIconSVG = `
469
+ <svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
470
+ <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
471
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
472
+ <line x1="12" y1="19" x2="12" y2="23"></line>
473
+ <line x1="8" y1="23" x2="16" y2="23"></line>
474
+ </svg>`;
475
+
476
+ const micMutedIconSVG = `
477
+ <svg xmlns="http://www.w3.org/2000/svg" width="100%" height="100%" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
478
+ <path d="M12 1a3 3 0 0 0-3 3v8a3 3 0 0 0 6 0V4a3 3 0 0 0-3-3z"></path>
479
+ <path d="M19 10v2a7 7 0 0 1-14 0v-2"></path>
480
+ <line x1="12" y1="19" x2="12" y2="23"></line>
481
+ <line x1="8" y1="23" x2="16" y2="23"></line>
482
+ <line x1="1" y1="1" x2="23" y2="23"></line>
483
+ </svg>`;
484
+
485
+ function updateButtonState() {
486
+ const existingMuteButton = startButton.querySelector('.mute-toggle');
487
+ if (existingMuteButton) {
488
+ existingMuteButton.removeEventListener('click', toggleMute);
489
+ }
490
+ startButton.innerHTML = '';
491
+
492
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
493
+ startButton.innerHTML = `
494
+ <div class="icon-with-spinner">
495
+ <div class="spinner"></div>
496
+ <span>Connecting...</span>
497
+ </div>
498
+ `;
499
+ startButton.disabled = true;
500
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
501
+ const pulseContainer = document.createElement('div');
502
+ pulseContainer.className = 'pulse-container';
503
+ pulseContainer.innerHTML = `
504
+ <div class="pulse-circle"></div>
505
+ <span>Stop Conversation</span>
506
+ `;
507
+
508
+ const muteToggle = document.createElement('div');
509
+ muteToggle.className = 'mute-toggle';
510
+ muteToggle.title = isMuted ? 'Unmute' : 'Mute';
511
+ muteToggle.innerHTML = isMuted ? micMutedIconSVG : micIconSVG;
512
+ muteToggle.addEventListener('click', toggleMute);
513
+
514
+ startButton.appendChild(pulseContainer);
515
+ startButton.appendChild(muteToggle);
516
+ startButton.disabled = false;
517
+
518
+ } else {
519
+ startButton.textContent = 'Start Conversation';
520
+ startButton.disabled = false;
521
+ }
522
+ }
523
+
524
+ function toggleMute(event) {
525
+ event.stopPropagation();
526
+ if (!peerConnection || peerConnection.connectionState !== 'connected') return;
527
+
528
+ isMuted = !isMuted;
529
+ console.log("Mute toggled:", isMuted);
530
+
531
+ peerConnection.getSenders().forEach(sender => {
532
+ if (sender.track && sender.track.kind === 'audio') {
533
+ sender.track.enabled = !isMuted;
534
+ console.log(`Audio track ${sender.track.id} enabled: ${!isMuted}`);
535
+ }
536
+ });
537
+
538
+ updateButtonState();
539
+ }
540
+
541
+ function setupAudioVisualization(stream) {
542
+ // Input audio context for pulse circle
543
+ audioContext_input = new (window.AudioContext || window.webkitAudioContext)();
544
+ analyser_input = audioContext_input.createAnalyser();
545
+ audioSource_input = audioContext_input.createMediaStreamSource(stream);
546
+ audioSource_input.connect(analyser_input);
547
+ analyser_input.fftSize = 64;
548
+ dataArray_input = new Uint8Array(analyser_input.frequencyBinCount);
549
+
550
+ function updateAudioLevel() {
551
+ // Update input audio visualization (pulse circle)
552
+ analyser_input.getByteFrequencyData(dataArray_input);
553
+ const average = Array.from(dataArray_input).reduce((a, b) => a + b, 0) / dataArray_input.length;
554
+ audioLevel = average / 255;
555
+
556
+ const pulseCircle = document.querySelector('.pulse-circle');
557
+ if (pulseCircle) {
558
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
559
+ }
560
+
561
+ animationFrame_input = requestAnimationFrame(updateAudioLevel);
562
+ }
563
+
564
+ updateAudioLevel();
565
+ }
566
+
567
+ function setupOutputVisualization(stream) {
568
+ // Create separate audio context for output visualization
569
+ audioContext_output = new (window.AudioContext || window.webkitAudioContext)();
570
+ analyser_output = audioContext_output.createAnalyser();
571
+ audioSource_output = audioContext_output.createMediaStreamSource(stream);
572
+ audioSource_output.connect(analyser_output);
573
+ analyser_output.fftSize = 2048;
574
+ dataArray_output = new Uint8Array(analyser_output.frequencyBinCount);
575
+
576
+ function updateVisualization() {
577
+ // Update output audio visualization (wave bars)
578
+ analyser_output.getByteFrequencyData(dataArray_output);
579
+
580
+ const boxes = document.querySelectorAll('.box');
581
+ for (let i = 0; i < boxes.length; i++) {
582
+ const index = Math.floor(i * dataArray_output.length / boxes.length);
583
+ const value = dataArray_output[index] / 255;
584
+ boxes[i].style.transform = `scaleY(${Math.max(0.1, value * 1.5)})`;
585
+ }
586
+
587
+ animationFrame_output = requestAnimationFrame(updateVisualization);
588
+ }
589
+
590
+ updateVisualization();
591
+ }
592
+
593
+ // Reset wave visualization bars to minimum height
594
+ function resetVisualization() {
595
+ const boxes = document.querySelectorAll('.box');
596
+ boxes.forEach(box => box.style.transform = 'scaleY(0.1)');
597
+ }
598
+
599
+ function showError(message) {
600
+ const toast = document.getElementById('error-toast');
601
+ toast.textContent = message;
602
+ toast.className = 'toast error';
603
+ toast.style.display = 'block';
604
+
605
+ setTimeout(() => {
606
+ toast.style.display = 'none';
607
+ }, 5000);
608
+ }
609
+
610
+ function handleMessage(event) {
611
+ const eventJson = JSON.parse(event.data);
612
+
613
+ if (eventJson.type === "error") {
614
+ showError(eventJson.message);
615
+ } else if (eventJson.type === "send_input") {
616
+ fetch('/input_hook', {
617
+ method: 'POST',
618
+ headers: {
619
+ 'Content-Type': 'application/json',
620
+ },
621
+ body: JSON.stringify({
622
+ webrtc_id: webrtc_id,
623
+ chatbot: messages,
624
+ state: messages
625
+ })
626
+ });
627
+ } else if (eventJson.type === "log") {
628
+ if (eventJson.data === "pause_detected") {
629
+ typingIndicator.style.display = 'block';
630
+ chatMessages.scrollTop = chatMessages.scrollHeight;
631
+ } else if (eventJson.data === "response_starting") {
632
+ typingIndicator.style.display = 'none';
633
+ }
634
+ }
635
+ }
636
+
637
+ async function setupWebRTC() {
638
+ const config = __RTC_CONFIGURATION__;
639
+ peerConnection = new RTCPeerConnection(config);
640
+
641
+ const timeoutId = setTimeout(() => {
642
+ const toast = document.getElementById('error-toast');
643
+ toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
644
+ toast.className = 'toast warning';
645
+ toast.style.display = 'block';
646
+
647
+ setTimeout(() => {
648
+ toast.style.display = 'none';
649
+ }, 5000);
650
+ }, 5000);
651
+
652
+ try {
653
+ const stream = await navigator.mediaDevices.getUserMedia({
654
+ audio: true
655
+ });
656
+
657
+ setupAudioVisualization(stream);
658
+
659
+ stream.getTracks().forEach(track => {
660
+ peerConnection.addTrack(track, stream);
661
+ });
662
+
663
+ // Add this listener to handle incoming audio track
664
+ peerConnection.addEventListener('track', (event) => {
665
+ if (event.track.kind === 'audio') {
666
+ console.log("Received audio track from server");
667
+
668
+ if (audioOutput) {
669
+ audioOutput.srcObject = event.streams[0];
670
+ audioOutput.play().catch(e => console.error("Audio play failed:", e));
671
+ }
672
+
673
+ // Set up visualization for output audio with separate context
674
+ setupOutputVisualization(event.streams[0]);
675
+ }
676
+ });
677
+
678
+ const dataChannel = peerConnection.createDataChannel('text');
679
+ dataChannel.onmessage = handleMessage;
680
+
681
+ const offer = await peerConnection.createOffer();
682
+ await peerConnection.setLocalDescription(offer);
683
+
684
+ peerConnection.onicecandidate = ({ candidate }) => {
685
+ if (candidate) {
686
+ console.debug("Sending ICE candidate", candidate);
687
+ fetch('/webrtc/offer', {
688
+ method: 'POST',
689
+ headers: { 'Content-Type': 'application/json' },
690
+ body: JSON.stringify({
691
+ candidate: candidate.toJSON(),
692
+ webrtc_id: webrtc_id,
693
+ type: "ice-candidate",
694
+ })
695
+ })
696
+ }
697
+ };
698
+
699
+ peerConnection.addEventListener('connectionstatechange', () => {
700
+ console.log('connectionstatechange', peerConnection.connectionState);
701
+ if (peerConnection.connectionState === 'connected') {
702
+ clearTimeout(timeoutId);
703
+ const toast = document.getElementById('error-toast');
704
+ toast.style.display = 'none';
705
+ } else if (['closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
706
+ stop();
707
+ }
708
+ updateButtonState();
709
+ });
710
+
711
+ webrtc_id = Math.random().toString(36).substring(7);
712
+
713
+ const response = await fetch('/webrtc/offer', {
714
+ method: 'POST',
715
+ headers: { 'Content-Type': 'application/json' },
716
+ body: JSON.stringify({
717
+ sdp: peerConnection.localDescription.sdp,
718
+ type: peerConnection.localDescription.type,
719
+ webrtc_id: webrtc_id
720
+ })
721
+ });
722
+
723
+ const serverResponse = await response.json();
724
+
725
+ if (serverResponse.status === 'failed') {
726
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
727
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
728
+ : serverResponse.meta.error);
729
+ stop();
730
+ return;
731
+ }
732
+
733
+ await peerConnection.setRemoteDescription(serverResponse);
734
+
735
+ eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
736
+ eventSource.addEventListener("output", (event) => {
737
+ const eventJson = JSON.parse(event.data);
738
+ console.log(eventJson);
739
+ messages.push(eventJson.message);
740
+ addMessage(eventJson.message.role, eventJson.audio ?? eventJson.message.content);
741
+ })
742
+ } catch (err) {
743
+ clearTimeout(timeoutId);
744
+ console.error('Error setting up WebRTC:', err);
745
+ showError('Failed to establish connection. Please try again.');
746
+ stop();
747
+ }
748
+ }
749
+
750
+ function addMessage(role, content) {
751
+ const messageDiv = document.createElement('div');
752
+ messageDiv.classList.add('message', role);
753
+ messageDiv.textContent = content;
754
+ chatMessages.appendChild(messageDiv);
755
+ chatMessages.scrollTop = chatMessages.scrollHeight;
756
+ }
757
+
758
+ function stop() {
759
+ if (eventSource) {
760
+ eventSource.close();
761
+ eventSource = null;
762
+ }
763
+
764
+ if (animationFrame_input) {
765
+ cancelAnimationFrame(animationFrame_input);
766
+ animationFrame_input = null;
767
+ }
768
+
769
+ if (animationFrame_output) {
770
+ cancelAnimationFrame(animationFrame_output);
771
+ animationFrame_output = null;
772
+ }
773
+
774
+ if (audioContext_input) {
775
+ audioContext_input.close().catch(e => console.error("Error closing input AudioContext:", e));
776
+ audioContext_input = null;
777
+ analyser_input = null;
778
+ dataArray_input = null;
779
+ audioSource_input = null;
780
+ }
781
+
782
+ if (audioContext_output) {
783
+ audioContext_output.close().catch(e => console.error("Error closing output AudioContext:", e));
784
+ audioContext_output = null;
785
+ analyser_output = null;
786
+ dataArray_output = null;
787
+ audioSource_output = null;
788
+ }
789
+
790
+ if (audioOutput) {
791
+ audioOutput.pause();
792
+ audioOutput.srcObject = null;
793
+ }
794
+
795
+ // Reset visualization
796
+ resetVisualization();
797
+
798
+ if (peerConnection) {
799
+ if (peerConnection.getTransceivers) {
800
+ peerConnection.getTransceivers().forEach(transceiver => {
801
+ if (transceiver.stop) {
802
+ transceiver.stop();
803
+ }
804
+ });
805
+ }
806
+
807
+ peerConnection.onicecandidate = null;
808
+ peerConnection.ondatachannel = null;
809
+ peerConnection.onconnectionstatechange = null;
810
+
811
+ peerConnection.close();
812
+ peerConnection = null;
813
+ }
814
+
815
+ isMuted = false;
816
+ updateButtonState();
817
+ audioLevel = 0;
818
+ }
819
+
820
+ startButton.addEventListener('click', (event) => {
821
+ if (event.target.closest('.mute-toggle')) {
822
+ return;
823
+ }
824
+
825
+ if (peerConnection && peerConnection.connectionState === 'connected') {
826
+ console.log("Stop button clicked");
827
+ stop();
828
+ } else if (!peerConnection || ['new', 'closed', 'failed', 'disconnected'].includes(peerConnection.connectionState)) {
829
+ console.log("Start button clicked");
830
+ messages = [];
831
+ chatMessages.innerHTML = '';
832
+ setupWebRTC();
833
+ updateButtonState();
834
+ }
835
+ });
836
+ </script>
837
+ </body>
838
+
839
+ </html>
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastrtc[vad]==0.0.20.rc2
2
+ groq
3
+ python-dotenv