freddyaboulton HF staff commited on
Commit
af5779f
·
verified ·
1 Parent(s): f3ecfe9

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ chiahuahua-long.mp3 filter=lfs diff=lfs merge=lfs -text
37
+ dachshund-long.mp3 filter=lfs diff=lfs merge=lfs -text
38
+ golden-retriever-long.mp3 filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,12 +1,15 @@
1
  ---
2
- title: Talk To Your Dog
3
- emoji: 🌍
4
- colorFrom: red
5
- colorTo: indigo
6
  sdk: gradio
7
  sdk_version: 5.23.2
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: PuppyChat
3
+ emoji: 🐶
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
  sdk_version: 5.23.2
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Talk to your dog!
12
+ tags: [webrtc, websocket, gradio, secret|TURN_TOKEN_ID, secret|TURN_API_TOKEN]
13
  ---
14
 
15
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+ from typing import Literal
5
+ import gradio as gr
6
+ import numpy as np
7
+ from dotenv import load_dotenv
8
+ from fastapi import FastAPI, HTTPException
9
+ from fastapi.responses import HTMLResponse, StreamingResponse, FileResponse
10
+ from fastrtc import (
11
+ AdditionalOutputs,
12
+ ReplyOnPause,
13
+ Stream,
14
+ get_stt_model,
15
+ get_twilio_turn_credentials,
16
+ )
17
+ import soundfile as sf
18
+ from gradio.utils import get_space
19
+ from pydantic import BaseModel
20
+ import httpx
21
+
22
+ load_dotenv()
23
+
24
+
25
+ curr_dir = Path(__file__).parent
26
+
27
+ stt_model = get_stt_model()
28
+
29
+ conversations = {}
30
+
31
+ barks = {
32
+ "chiahuahua-short": sf.read(curr_dir / "chiahuahua-short.mp3"),
33
+ "chiahuahua-long": sf.read(curr_dir / "chiahuahua-long.mp3"),
34
+ "dachshund-short": sf.read(curr_dir / "dachshund-short.mp3"),
35
+ "dachshund-long": sf.read(curr_dir / "dachshund-long.mp3"),
36
+ "golden-retriever-short": sf.read(curr_dir / "golden-retriever-short.mp3"),
37
+ "golden-retriever-long": sf.read(curr_dir / "golden-retriever-long.mp3"),
38
+ }
39
+ for k, v in barks.items():
40
+ # Convert to mono if stereo by averaging channels, otherwise just use as is
41
+ audio_data = v[0]
42
+ if len(audio_data.shape) > 1 and audio_data.shape[1] > 1:
43
+ # Convert stereo to mono by averaging channels
44
+ audio_data = np.mean(audio_data, axis=1)
45
+ barks[k] = (v[1], audio_data.astype(np.float32))
46
+
47
+
48
+ def response(
49
+ audio: tuple[int, np.ndarray],
50
+ breed: Literal["chiahuahua", "dachshund", "golden-retriever"],
51
+ ):
52
+ response = []
53
+ prompt = stt_model.stt(audio)
54
+ response.append({"role": "user", "content": prompt})
55
+ length = "long" if len(prompt.split(" ")) > 10 else "short"
56
+ file_name = f"{breed}-{length}"
57
+ response.append(
58
+ {
59
+ "role": "assistant",
60
+ "content": f"/files/{file_name}.mp3",
61
+ }
62
+ )
63
+ audio_ = barks[file_name]
64
+ yield audio_, AdditionalOutputs(response)
65
+
66
+
67
+ stream = Stream(
68
+ modality="audio",
69
+ mode="send-receive",
70
+ handler=ReplyOnPause(response),
71
+ additional_outputs_handler=lambda a, b: b,
72
+ additional_inputs=[
73
+ gr.Dropdown(choices=["chiahuahua", "dachshund", "golden-retriever"])
74
+ ],
75
+ additional_outputs=[gr.JSON()],
76
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
77
+ concurrency_limit=5 if get_space() else None,
78
+ time_limit=90 if get_space() else None,
79
+ )
80
+
81
+
82
+ class InputData(BaseModel):
83
+ webrtc_id: str
84
+ breed: Literal["chiahuahua", "dachshund", "golden-retriever"]
85
+
86
+
87
+ client = httpx.AsyncClient()
88
+ app = FastAPI()
89
+ stream.mount(app)
90
+
91
+
92
+ @app.get("/")
93
+ async def _():
94
+ turn_key_id = os.environ.get("TURN_TOKEN_ID")
95
+ turn_key_api_token = os.environ.get("TURN_API_TOKEN")
96
+ ttl = 600
97
+
98
+ response = await client.post(
99
+ f"https://rtc.live.cloudflare.com/v1/turn/keys/{turn_key_id}/credentials/generate-ice-servers",
100
+ headers={
101
+ "Authorization": f"Bearer {turn_key_api_token}",
102
+ "Content-Type": "application/json",
103
+ },
104
+ json={"ttl": ttl},
105
+ )
106
+ if response.is_success:
107
+ rtc_config = response.json()
108
+ else:
109
+ raise Exception(
110
+ f"Failed to get TURN credentials: {response.status_code} {response.text}"
111
+ )
112
+ html_content = (curr_dir / "index.html").read_text()
113
+ html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
114
+ return HTMLResponse(content=html_content, status_code=200)
115
+
116
+
117
+ @app.get("/files/{file_name}")
118
+ async def _(file_name: str):
119
+ print("file_name", file_name)
120
+ if Path(file_name).name.replace(".mp3", "") not in barks:
121
+ raise HTTPException(status_code=404, detail="File not found")
122
+ return FileResponse(curr_dir / file_name)
123
+
124
+
125
+ @app.post("/input_hook")
126
+ async def _(body: InputData):
127
+ stream.set_input(body.webrtc_id, body.breed)
128
+ return {"status": "ok"}
129
+
130
+
131
+ @app.get("/outputs")
132
+ def _(webrtc_id: str):
133
+ async def output_stream():
134
+ async for output in stream.output_stream(webrtc_id):
135
+ messages = output.args[0]
136
+ for msg in messages:
137
+ yield f"event: output\ndata: {json.dumps(msg)}\n\n"
138
+
139
+ return StreamingResponse(output_stream(), media_type="text/event-stream")
140
+
141
+
142
+ if __name__ == "__main__":
143
+ import os
144
+
145
+ if (mode := os.getenv("MODE")) == "UI":
146
+ stream.ui.launch(server_port=7860)
147
+ elif mode == "PHONE":
148
+ stream.fastphone(host="0.0.0.0", port=7860)
149
+ else:
150
+ import uvicorn
151
+
152
+ uvicorn.run(app, host="0.0.0.0" if get_space() else "127.0.0.1", port=7860)
chiahuahua-long.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e74a6f0da8dcc2882b3c12bde17436231fc727128925b745d5bef6d41918a5a
3
+ size 131711
chiahuahua-short.mp3 ADDED
Binary file (40.3 kB). View file
 
dachshund-long.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d412dd964d6d5590849b91782623ff014a527918b091366145cbc59eda9f898b
3
+ size 158448
dachshund-short.mp3 ADDED
Binary file (68.8 kB). View file
 
golden-retriever-long.mp3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf5d51c12027f2c05c73a6c1c6a27dfd32b461c984885c4c48dcbea7ba130662
3
+ size 133846
golden-retriever-short.mp3 ADDED
Binary file (39.3 kB). View file
 
index.html ADDED
@@ -0,0 +1,811 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>PuppyChat 🐶</title>
8
+ <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700&display=swap" rel="stylesheet">
9
+ <style>
10
+ :root {
11
+ --bg-primary: #f8e8b9;
12
+ --bg-secondary: #fdf6e3;
13
+ --accent-green: #7cc9a9;
14
+ --accent-brown: #8b6a4a;
15
+ --accent-yellow: #f7cf77;
16
+ --text-primary: #5a4031;
17
+ --text-secondary: #8b6a4a;
18
+ }
19
+
20
+ * {
21
+ box-sizing: border-box;
22
+ margin: 0;
23
+ padding: 0;
24
+ }
25
+
26
+ body {
27
+ font-family: 'Nunito', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, sans-serif;
28
+ background-color: var(--bg-primary);
29
+ color: var(--text-primary);
30
+ margin: 0;
31
+ padding: 20px;
32
+ min-height: 100vh;
33
+ background-image: url('https://i.imgur.com/8NIgzrU.png');
34
+ background-size: cover;
35
+ background-position: center;
36
+ background-attachment: fixed;
37
+ overflow-x: hidden;
38
+ }
39
+
40
+ .container {
41
+ max-width: 800px;
42
+ margin: 0 auto;
43
+ height: 100%;
44
+ position: relative;
45
+ }
46
+
47
+ .app-container {
48
+ background-color: var(--bg-secondary);
49
+ border-radius: 25px;
50
+ box-shadow: 0 8px 32px rgba(138, 106, 74, 0.15);
51
+ padding: 25px;
52
+ margin-top: 30px;
53
+ position: relative;
54
+ overflow: hidden;
55
+ border: 6px solid var(--accent-brown);
56
+ }
57
+
58
+ .app-container:before {
59
+ content: '';
60
+ position: absolute;
61
+ top: 0;
62
+ left: 0;
63
+ right: 0;
64
+ height: 15px;
65
+ background: var(--accent-green);
66
+ border-radius: 10px 10px 0 0;
67
+ }
68
+
69
+ .logo {
70
+ text-align: center;
71
+ margin-bottom: 20px;
72
+ position: relative;
73
+ }
74
+
75
+ .logo h1 {
76
+ font-size: 2.5em;
77
+ color: var(--accent-brown);
78
+ text-shadow: 2px 2px 0 var(--accent-yellow);
79
+ margin-bottom: 5px;
80
+ letter-spacing: 1px;
81
+ }
82
+
83
+ .logo img {
84
+ width: 250px;
85
+ margin-bottom: 10px;
86
+ }
87
+
88
+ .logo-leaves {
89
+ position: absolute;
90
+ width: 100px;
91
+ height: 100px;
92
+ background-image: url('https://i.imgur.com/GdOS5Vv.png');
93
+ background-size: contain;
94
+ background-repeat: no-repeat;
95
+ z-index: -1;
96
+ }
97
+
98
+ .logo-leaves.left {
99
+ top: -20px;
100
+ left: 40px;
101
+ transform: rotate(-30deg);
102
+ }
103
+
104
+ .logo-leaves.right {
105
+ top: -20px;
106
+ right: 40px;
107
+ transform: rotate(30deg) scaleX(-1);
108
+ }
109
+
110
+ .breed-selector {
111
+ background-color: #fff;
112
+ padding: 20px;
113
+ border-radius: 20px;
114
+ margin-bottom: 20px;
115
+ box-shadow: 0 4px 15px rgba(138, 106, 74, 0.1);
116
+ border: 3px solid var(--accent-yellow);
117
+ }
118
+
119
+ .breed-selector h3 {
120
+ margin-bottom: 15px;
121
+ color: var(--text-secondary);
122
+ font-size: 1.2em;
123
+ }
124
+
125
+ .breed-dropdown {
126
+ width: 100%;
127
+ padding: 12px 15px;
128
+ border-radius: 15px;
129
+ border: 2px solid var(--accent-green);
130
+ font-family: 'Nunito', sans-serif;
131
+ font-size: 16px;
132
+ color: var(--text-primary);
133
+ background-color: #fafafa;
134
+ cursor: pointer;
135
+ transition: all 0.3s ease;
136
+ }
137
+
138
+ .breed-dropdown:focus {
139
+ outline: none;
140
+ box-shadow: 0 0 0 3px rgba(124, 201, 169, 0.3);
141
+ border-color: var(--accent-green);
142
+ }
143
+
144
+ .chat-container {
145
+ background: #fff;
146
+ border-radius: 20px;
147
+ box-shadow: 0 4px 15px rgba(138, 106, 74, 0.1);
148
+ padding: 20px;
149
+ height: 400px;
150
+ display: flex;
151
+ flex-direction: column;
152
+ border: 3px solid var(--accent-green);
153
+ position: relative;
154
+ }
155
+
156
+ .chat-container:after {
157
+ content: '';
158
+ position: absolute;
159
+ bottom: -10px;
160
+ right: -10px;
161
+ width: 80px;
162
+ height: 80px;
163
+ background-image: url('https://i.imgur.com/JbkTVa6.png');
164
+ background-size: contain;
165
+ background-repeat: no-repeat;
166
+ opacity: 0.7;
167
+ }
168
+
169
+ .chat-messages {
170
+ flex-grow: 1;
171
+ overflow-y: auto;
172
+ margin-bottom: 10px;
173
+ padding: 10px;
174
+ scrollbar-width: thin;
175
+ scrollbar-color: var(--accent-green) #f0f0f0;
176
+ }
177
+
178
+ .chat-messages::-webkit-scrollbar {
179
+ width: 8px;
180
+ }
181
+
182
+ .chat-messages::-webkit-scrollbar-track {
183
+ background: #f0f0f0;
184
+ border-radius: 10px;
185
+ }
186
+
187
+ .chat-messages::-webkit-scrollbar-thumb {
188
+ background-color: var(--accent-green);
189
+ border-radius: 10px;
190
+ }
191
+
192
+ .message {
193
+ margin-bottom: 15px;
194
+ padding: 12px 16px;
195
+ border-radius: 18px;
196
+ font-size: 15px;
197
+ line-height: 1.5;
198
+ position: relative;
199
+ max-width: 80%;
200
+ animation: pop-in 0.3s ease-out forwards;
201
+ }
202
+
203
+ @keyframes pop-in {
204
+ 0% {
205
+ opacity: 0;
206
+ transform: scale(0.8) translateY(10px);
207
+ }
208
+
209
+ 100% {
210
+ opacity: 1;
211
+ transform: scale(1) translateY(0);
212
+ }
213
+ }
214
+
215
+ .message.user {
216
+ background-color: var(--accent-yellow);
217
+ align-self: flex-end;
218
+ margin-left: auto;
219
+ border-bottom-right-radius: 4px;
220
+ color: var(--text-primary);
221
+ }
222
+
223
+ .message.assistant {
224
+ background-color: var(--accent-green);
225
+ align-self: flex-start;
226
+ margin-right: auto;
227
+ border-bottom-left-radius: 4px;
228
+ color: #fff;
229
+ }
230
+
231
+ .message.user::after {
232
+ content: '';
233
+ position: absolute;
234
+ bottom: 0;
235
+ right: -10px;
236
+ width: 20px;
237
+ height: 20px;
238
+ background-color: var(--accent-yellow);
239
+ clip-path: polygon(0 0, 0% 100%, 100% 100%);
240
+ }
241
+
242
+ .message.assistant::after {
243
+ content: '';
244
+ position: absolute;
245
+ bottom: 0;
246
+ left: -10px;
247
+ width: 20px;
248
+ height: 20px;
249
+ background-color: var(--accent-green);
250
+ clip-path: polygon(100% 0, 0% 100%, 100% 100%);
251
+ }
252
+
253
+ .assistant audio {
254
+ margin-top: 8px;
255
+ width: 100%;
256
+ border-radius: 12px;
257
+ background-color: rgba(255, 255, 255, 0.3);
258
+ }
259
+
260
+ .typing-indicator {
261
+ padding: 12px;
262
+ background-color: var(--accent-green);
263
+ border-radius: 18px;
264
+ margin-bottom: 10px;
265
+ display: none;
266
+ width: fit-content;
267
+ align-self: flex-start;
268
+ border-bottom-left-radius: 4px;
269
+ position: relative;
270
+ }
271
+
272
+ .typing-indicator::after {
273
+ content: '';
274
+ position: absolute;
275
+ bottom: 0;
276
+ left: -10px;
277
+ width: 20px;
278
+ height: 20px;
279
+ background-color: var(--accent-green);
280
+ clip-path: polygon(100% 0, 0% 100%, 100% 100%);
281
+ }
282
+
283
+ .dots {
284
+ display: inline-flex;
285
+ gap: 4px;
286
+ }
287
+
288
+ .dot {
289
+ width: 8px;
290
+ height: 8px;
291
+ background-color: #fff;
292
+ border-radius: 50%;
293
+ animation: bounce 1.5s infinite;
294
+ opacity: 0.8;
295
+ }
296
+
297
+ .dot:nth-child(2) {
298
+ animation-delay: 0.2s;
299
+ }
300
+
301
+ .dot:nth-child(3) {
302
+ animation-delay: 0.4s;
303
+ }
304
+
305
+ @keyframes bounce {
306
+
307
+ 0%,
308
+ 100% {
309
+ transform: translateY(0);
310
+ }
311
+
312
+ 50% {
313
+ transform: translateY(-6px);
314
+ }
315
+ }
316
+
317
+ .controls {
318
+ text-align: center;
319
+ margin-top: 25px;
320
+ position: relative;
321
+ }
322
+
323
+ button {
324
+ background-color: var(--accent-green);
325
+ color: white;
326
+ border: none;
327
+ padding: 14px 30px;
328
+ font-family: inherit;
329
+ font-size: 16px;
330
+ font-weight: 700;
331
+ cursor: pointer;
332
+ transition: all 0.3s;
333
+ border-radius: 50px;
334
+ box-shadow: 0 4px 0 var(--text-secondary);
335
+ position: relative;
336
+ overflow: hidden;
337
+ }
338
+
339
+ button:hover {
340
+ background-color: #6ab897;
341
+ transform: translateY(-2px);
342
+ }
343
+
344
+ button:active {
345
+ transform: translateY(2px);
346
+ box-shadow: 0 2px 0 var(--text-secondary);
347
+ }
348
+
349
+ button::before {
350
+ content: '';
351
+ position: absolute;
352
+ top: 0;
353
+ left: -100%;
354
+ width: 100%;
355
+ height: 100%;
356
+ background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.2), transparent);
357
+ transition: 0.5s;
358
+ }
359
+
360
+ button:hover::before {
361
+ left: 100%;
362
+ }
363
+
364
+ #audio-output {
365
+ display: none;
366
+ }
367
+
368
+ .icon-with-spinner {
369
+ display: flex;
370
+ align-items: center;
371
+ justify-content: center;
372
+ gap: 12px;
373
+ min-width: 180px;
374
+ }
375
+
376
+ .spinner {
377
+ width: 20px;
378
+ height: 20px;
379
+ border: 2px solid #ffffff;
380
+ border-top-color: transparent;
381
+ border-radius: 50%;
382
+ animation: spin 1s linear infinite;
383
+ flex-shrink: 0;
384
+ }
385
+
386
+ @keyframes spin {
387
+ to {
388
+ transform: rotate(360deg);
389
+ }
390
+ }
391
+
392
+ .pulse-container {
393
+ display: flex;
394
+ align-items: center;
395
+ justify-content: center;
396
+ gap: 12px;
397
+ min-width: 180px;
398
+ }
399
+
400
+ .pulse-circle {
401
+ width: 20px;
402
+ height: 20px;
403
+ border-radius: 50%;
404
+ background-color: #ffffff;
405
+ opacity: 0.7;
406
+ flex-shrink: 0;
407
+ transform: translateX(-0%) scale(var(--audio-level, 1));
408
+ transition: transform 0.1s ease;
409
+ }
410
+
411
+ .toast {
412
+ position: fixed;
413
+ top: 20px;
414
+ left: 50%;
415
+ transform: translateX(-50%);
416
+ padding: 16px 24px;
417
+ border-radius: 50px;
418
+ font-size: 14px;
419
+ z-index: 1000;
420
+ display: none;
421
+ box-shadow: 0 4px 15px rgba(0, 0, 0, 0.2);
422
+ }
423
+
424
+ .toast.error {
425
+ background-color: #ff6b6b;
426
+ color: white;
427
+ }
428
+
429
+ .toast.warning {
430
+ background-color: #ffd166;
431
+ color: var(--text-primary);
432
+ }
433
+
434
+ /* Animal Crossing cloud decoration */
435
+ .cloud {
436
+ position: absolute;
437
+ background: #fff;
438
+ border-radius: 50%;
439
+ opacity: 0.7;
440
+ filter: blur(5px);
441
+ }
442
+
443
+ .cloud-1 {
444
+ width: 100px;
445
+ height: 60px;
446
+ top: 5%;
447
+ right: 10%;
448
+ animation: float 20s ease-in-out infinite;
449
+ }
450
+
451
+ .cloud-2 {
452
+ width: 70px;
453
+ height: 40px;
454
+ top: 15%;
455
+ left: 5%;
456
+ animation: float 15s ease-in-out infinite 2s;
457
+ }
458
+
459
+ @keyframes float {
460
+
461
+ 0%,
462
+ 100% {
463
+ transform: translateY(0) translateX(0);
464
+ }
465
+
466
+ 50% {
467
+ transform: translateY(-20px) translateX(20px);
468
+ }
469
+ }
470
+
471
+ /* Responsive styles */
472
+ @media (max-width: 768px) {
473
+ .container {
474
+ padding: 10px;
475
+ }
476
+
477
+ .app-container {
478
+ padding: 15px;
479
+ }
480
+
481
+ .logo h1 {
482
+ font-size: 2em;
483
+ }
484
+
485
+ .logo img {
486
+ width: 180px;
487
+ }
488
+
489
+ .message {
490
+ max-width: 90%;
491
+ }
492
+ }
493
+ </style>
494
+ </head>
495
+
496
+ <body>
497
+ <div id="error-toast" class="toast"></div>
498
+ <div class="cloud cloud-1"></div>
499
+ <div class="cloud cloud-2"></div>
500
+
501
+ <div class="container">
502
+ <div class="logo">
503
+ <div class="logo-leaves left"></div>
504
+ <div class="logo-leaves right"></div>
505
+ <h1>PuppyChat 🐶</h1>
506
+ </div>
507
+
508
+ <div class="app-container">
509
+ <div class="breed-selector">
510
+ <h3>Choose your furry friend:</h3>
511
+ <select id="breed-dropdown" class="breed-dropdown">
512
+ <option value="chiahuahua">Chiahuahua</option>
513
+ <option value="dachshund">Dachshund</option>
514
+ <option value="golden-retriever">Golden Retriever</option>
515
+ </select>
516
+ </div>
517
+
518
+ <div class="chat-container">
519
+ <div class="chat-messages" id="chat-messages"></div>
520
+ <div class="typing-indicator" id="typing-indicator">
521
+ <div class="dots">
522
+ <div class="dot"></div>
523
+ <div class="dot"></div>
524
+ <div class="dot"></div>
525
+ </div>
526
+ </div>
527
+ </div>
528
+ </div>
529
+
530
+ <div class="controls">
531
+ <button id="start-button">Start Conversation</button>
532
+ </div>
533
+ </div>
534
+ <audio id="audio-output"></audio>
535
+
536
+ <script>
537
+ let peerConnection;
538
+ let webrtc_id;
539
+ const startButton = document.getElementById('start-button');
540
+ const chatMessages = document.getElementById('chat-messages');
541
+ const breedDropdown = document.getElementById('breed-dropdown');
542
+
543
+ let audioLevel = 0;
544
+ let animationFrame;
545
+ let audioContext, analyser, audioSource;
546
+ let eventSource;
547
+
548
+ function updateButtonState() {
549
+ const button = document.getElementById('start-button');
550
+ if (peerConnection && (peerConnection.connectionState === 'connecting' || peerConnection.connectionState === 'new')) {
551
+ button.innerHTML = `
552
+ <div class="icon-with-spinner">
553
+ <div class="spinner"></div>
554
+ <span>Connecting...</span>
555
+ </div>
556
+ `;
557
+ } else if (peerConnection && peerConnection.connectionState === 'connected') {
558
+ button.innerHTML = `
559
+ <div class="pulse-container">
560
+ <div class="pulse-circle"></div>
561
+ <span>Stop Conversation</span>
562
+ </div>
563
+ `;
564
+ } else {
565
+ button.innerHTML = 'Start Conversation';
566
+ }
567
+ }
568
+
569
+ function setupAudioVisualization(stream) {
570
+ audioContext = new (window.AudioContext || window.webkitAudioContext)();
571
+ analyser = audioContext.createAnalyser();
572
+ audioSource = audioContext.createMediaStreamSource(stream);
573
+ audioSource.connect(analyser);
574
+ analyser.fftSize = 64;
575
+ const dataArray = new Uint8Array(analyser.frequencyBinCount);
576
+
577
+ function updateAudioLevel() {
578
+ analyser.getByteFrequencyData(dataArray);
579
+ const average = Array.from(dataArray).reduce((a, b) => a + b, 0) / dataArray.length;
580
+ audioLevel = average / 255;
581
+
582
+ const pulseCircle = document.querySelector('.pulse-circle');
583
+ if (pulseCircle) {
584
+ pulseCircle.style.setProperty('--audio-level', 1 + audioLevel);
585
+ }
586
+
587
+ animationFrame = requestAnimationFrame(updateAudioLevel);
588
+ }
589
+ updateAudioLevel();
590
+ }
591
+
592
+ function showError(message) {
593
+ const toast = document.getElementById('error-toast');
594
+ toast.textContent = message;
595
+ toast.className = 'toast error';
596
+ toast.style.display = 'block';
597
+
598
+ // Hide toast after 5 seconds
599
+ setTimeout(() => {
600
+ toast.style.display = 'none';
601
+ }, 5000);
602
+ }
603
+
604
+ function handleMessage(event) {
605
+ const eventJson = JSON.parse(event.data);
606
+ const typingIndicator = document.getElementById('typing-indicator');
607
+
608
+ if (eventJson.type === "error") {
609
+ showError(eventJson.message);
610
+ } else if (eventJson.type === "send_input") {
611
+ fetch('/input_hook', {
612
+ method: 'POST',
613
+ headers: {
614
+ 'Content-Type': 'application/json',
615
+ },
616
+ body: JSON.stringify({
617
+ webrtc_id: webrtc_id,
618
+ breed: breedDropdown.value
619
+ })
620
+ });
621
+ } else if (eventJson.type === "log") {
622
+ if (eventJson.data === "pause_detected") {
623
+ typingIndicator.style.display = 'block';
624
+ chatMessages.scrollTop = chatMessages.scrollHeight;
625
+ } else if (eventJson.data === "response_starting") {
626
+ typingIndicator.style.display = 'none';
627
+ }
628
+ }
629
+ }
630
+
631
+ async function setupWebRTC() {
632
+ const config = __RTC_CONFIGURATION__;
633
+ peerConnection = new RTCPeerConnection(config);
634
+
635
+ const timeoutId = setTimeout(() => {
636
+ const toast = document.getElementById('error-toast');
637
+ toast.textContent = "Connection is taking longer than usual. Are you on a VPN?";
638
+ toast.className = 'toast warning';
639
+ toast.style.display = 'block';
640
+
641
+ // Hide warning after 5 seconds
642
+ setTimeout(() => {
643
+ toast.style.display = 'none';
644
+ }, 5000);
645
+ }, 5000);
646
+
647
+ try {
648
+ const stream = await navigator.mediaDevices.getUserMedia({
649
+ audio: true
650
+ });
651
+
652
+ setupAudioVisualization(stream);
653
+
654
+ stream.getTracks().forEach(track => {
655
+ peerConnection.addTrack(track, stream);
656
+ });
657
+
658
+ // Add track event listener to handle incoming audio
659
+ const audioOutput = document.getElementById('audio-output');
660
+ peerConnection.addEventListener('track', (evt) => {
661
+ if (audioOutput && audioOutput.srcObject !== evt.streams[0]) {
662
+ audioOutput.srcObject = evt.streams[0];
663
+ audioOutput.play();
664
+ console.log('Track received:', evt.track.kind);
665
+ }
666
+ });
667
+
668
+ const dataChannel = peerConnection.createDataChannel('text');
669
+ dataChannel.onmessage = handleMessage;
670
+
671
+ const offer = await peerConnection.createOffer();
672
+ await peerConnection.setLocalDescription(offer);
673
+
674
+ peerConnection.onicecandidate = ({ candidate }) => {
675
+ if (candidate) {
676
+ console.debug("Sending ICE candidate", candidate);
677
+ fetch('/webrtc/offer', {
678
+ method: 'POST',
679
+ headers: { 'Content-Type': 'application/json' },
680
+ body: JSON.stringify({
681
+ candidate: candidate.toJSON(),
682
+ webrtc_id: webrtc_id,
683
+ type: "ice-candidate",
684
+ })
685
+ })
686
+ }
687
+ };
688
+
689
+ peerConnection.addEventListener('connectionstatechange', () => {
690
+ console.log('connectionstatechange', peerConnection.connectionState);
691
+ if (peerConnection.connectionState === 'connected') {
692
+ clearTimeout(timeoutId);
693
+ const toast = document.getElementById('error-toast');
694
+ toast.style.display = 'none';
695
+ }
696
+ updateButtonState();
697
+ });
698
+
699
+ webrtc_id = Math.random().toString(36).substring(7);
700
+
701
+ const response = await fetch('/webrtc/offer', {
702
+ method: 'POST',
703
+ headers: { 'Content-Type': 'application/json' },
704
+ body: JSON.stringify({
705
+ sdp: peerConnection.localDescription.sdp,
706
+ type: peerConnection.localDescription.type,
707
+ webrtc_id: webrtc_id
708
+ })
709
+ });
710
+
711
+ const serverResponse = await response.json();
712
+
713
+ if (serverResponse.status === 'failed') {
714
+ showError(serverResponse.meta.error === 'concurrency_limit_reached'
715
+ ? `Too many connections. Maximum limit is ${serverResponse.meta.limit}`
716
+ : serverResponse.meta.error);
717
+ stop();
718
+ return;
719
+ }
720
+
721
+ await peerConnection.setRemoteDescription(serverResponse);
722
+
723
+ eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
724
+ eventSource.addEventListener("output", (event) => {
725
+ const eventJson = JSON.parse(event.data);
726
+ console.log(eventJson);
727
+
728
+ // Add the message based on role
729
+ addMessage(eventJson.role, eventJson.content);
730
+ });
731
+ } catch (err) {
732
+ clearTimeout(timeoutId);
733
+ console.error('Error setting up WebRTC:', err);
734
+ showError('Failed to establish connection. Please try again.');
735
+ stop();
736
+ }
737
+ }
738
+
739
+ async function addMessage(role, content) {
740
+ const messageDiv = document.createElement('div');
741
+ messageDiv.classList.add('message', role);
742
+
743
+ // Hide typing indicator when a message is added
744
+ document.getElementById('typing-indicator').style.display = 'none';
745
+
746
+ if (role === 'user') {
747
+ // Text content for user messages from STT
748
+ messageDiv.textContent = content;
749
+ } else if (role === 'assistant') {
750
+ // For assistant, create audio element with file path
751
+ if (content.startsWith('/files/')) {
752
+ const audio = document.createElement('audio');
753
+ audio.controls = true;
754
+ audio.src = content;
755
+ messageDiv.textContent = "Woof woof! 🐶";
756
+ messageDiv.appendChild(audio);
757
+ } else {
758
+ messageDiv.textContent = content;
759
+ }
760
+ }
761
+
762
+ chatMessages.appendChild(messageDiv);
763
+ chatMessages.scrollTop = chatMessages.scrollHeight;
764
+ }
765
+
766
+ function stop() {
767
+ if (eventSource) {
768
+ eventSource.close();
769
+ eventSource = null;
770
+ }
771
+
772
+ if (animationFrame) {
773
+ cancelAnimationFrame(animationFrame);
774
+ }
775
+ if (audioContext) {
776
+ audioContext.close();
777
+ audioContext = null;
778
+ analyser = null;
779
+ audioSource = null;
780
+ }
781
+ if (peerConnection) {
782
+ if (peerConnection.getTransceivers) {
783
+ peerConnection.getTransceivers().forEach(transceiver => {
784
+ if (transceiver.stop) {
785
+ transceiver.stop();
786
+ }
787
+ });
788
+ }
789
+
790
+ if (peerConnection.getSenders) {
791
+ peerConnection.getSenders().forEach(sender => {
792
+ if (sender.track && sender.track.stop) sender.track.stop();
793
+ });
794
+ }
795
+ peerConnection.close();
796
+ }
797
+ updateButtonState();
798
+ audioLevel = 0;
799
+ }
800
+
801
+ startButton.addEventListener('click', () => {
802
+ if (!peerConnection || peerConnection.connectionState !== 'connected') {
803
+ setupWebRTC();
804
+ } else {
805
+ stop();
806
+ }
807
+ });
808
+ </script>
809
+ </body>
810
+
811
+ </html>
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ fastrtc[vad]
2
+ soundfile
3
+ httpx