Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Upload folder using huggingface_hub
Browse files- README.md +2 -7
- app.py +26 -6
- index.html +12 -6
- requirements.txt +2 -1
README.md
CHANGED
@@ -4,17 +4,12 @@ emoji: 🗣️
|
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
short_description: Talk to OpenAI using their multimodal API
|
12 |
-
tags:
|
13 |
-
- webrtc
|
14 |
-
- websocket
|
15 |
-
- gradio
|
16 |
-
- secret|HF_TOKEN
|
17 |
-
- secret|OPENAI_API_KEY
|
18 |
---
|
19 |
|
20 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
4 |
colorFrom: purple
|
5 |
colorTo: red
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.16.0
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
11 |
short_description: Talk to OpenAI using their multimodal API
|
12 |
+
tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
|
|
|
|
|
|
|
|
|
|
|
13 |
---
|
14 |
|
15 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
app.py
CHANGED
@@ -13,7 +13,7 @@ from fastrtc import (
|
|
13 |
AdditionalOutputs,
|
14 |
AsyncStreamHandler,
|
15 |
Stream,
|
16 |
-
|
17 |
wait_for_item,
|
18 |
)
|
19 |
from gradio.utils import get_space
|
@@ -50,12 +50,32 @@ class OpenAIHandler(AsyncStreamHandler):
|
|
50 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
51 |
) as conn:
|
52 |
await conn.session.update(
|
53 |
-
session={
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
)
|
55 |
self.connection = conn
|
56 |
async for event in self.connection:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
if event.type == "response.audio_transcript.done":
|
58 |
-
await self.output_queue.put(
|
|
|
|
|
|
|
|
|
59 |
if event.type == "response.audio.delta":
|
60 |
await self.output_queue.put(
|
61 |
(
|
@@ -97,7 +117,7 @@ stream = Stream(
|
|
97 |
additional_inputs=[chatbot],
|
98 |
additional_outputs=[chatbot],
|
99 |
additional_outputs_handler=update_chatbot,
|
100 |
-
rtc_configuration=
|
101 |
concurrency_limit=5 if get_space() else None,
|
102 |
time_limit=90 if get_space() else None,
|
103 |
)
|
@@ -109,7 +129,7 @@ stream.mount(app)
|
|
109 |
|
110 |
@app.get("/")
|
111 |
async def _():
|
112 |
-
rtc_config =
|
113 |
html_content = (cur_dir / "index.html").read_text()
|
114 |
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
|
115 |
return HTMLResponse(content=html_content)
|
@@ -121,7 +141,7 @@ def _(webrtc_id: str):
|
|
121 |
import json
|
122 |
|
123 |
async for output in stream.output_stream(webrtc_id):
|
124 |
-
s = json.dumps(
|
125 |
yield f"event: output\ndata: {s}\n\n"
|
126 |
|
127 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
|
|
13 |
AdditionalOutputs,
|
14 |
AsyncStreamHandler,
|
15 |
Stream,
|
16 |
+
get_twilio_turn_credentials,
|
17 |
wait_for_item,
|
18 |
)
|
19 |
from gradio.utils import get_space
|
|
|
50 |
model="gpt-4o-mini-realtime-preview-2024-12-17"
|
51 |
) as conn:
|
52 |
await conn.session.update(
|
53 |
+
session={
|
54 |
+
"turn_detection": {"type": "server_vad"},
|
55 |
+
"input_audio_transcription": {
|
56 |
+
"model": "whisper-1",
|
57 |
+
"language": "en",
|
58 |
+
},
|
59 |
+
}
|
60 |
)
|
61 |
self.connection = conn
|
62 |
async for event in self.connection:
|
63 |
+
# Handle interruptions
|
64 |
+
if event.type == "input_audio_buffer.speech_started":
|
65 |
+
self.clear_queue()
|
66 |
+
if (
|
67 |
+
event.type
|
68 |
+
== "conversation.item.input_audio_transcription.completed"
|
69 |
+
):
|
70 |
+
await self.output_queue.put(
|
71 |
+
AdditionalOutputs({"role": "user", "content": event.transcript})
|
72 |
+
)
|
73 |
if event.type == "response.audio_transcript.done":
|
74 |
+
await self.output_queue.put(
|
75 |
+
AdditionalOutputs(
|
76 |
+
{"role": "assistant", "content": event.transcript}
|
77 |
+
)
|
78 |
+
)
|
79 |
if event.type == "response.audio.delta":
|
80 |
await self.output_queue.put(
|
81 |
(
|
|
|
117 |
additional_inputs=[chatbot],
|
118 |
additional_outputs=[chatbot],
|
119 |
additional_outputs_handler=update_chatbot,
|
120 |
+
rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
|
121 |
concurrency_limit=5 if get_space() else None,
|
122 |
time_limit=90 if get_space() else None,
|
123 |
)
|
|
|
129 |
|
130 |
@app.get("/")
|
131 |
async def _():
|
132 |
+
rtc_config = get_twilio_turn_credentials() if get_space() else None
|
133 |
html_content = (cur_dir / "index.html").read_text()
|
134 |
html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
|
135 |
return HTMLResponse(content=html_content)
|
|
|
141 |
import json
|
142 |
|
143 |
async for output in stream.output_stream(webrtc_id):
|
144 |
+
s = json.dumps(output.args[0])
|
145 |
yield f"event: output\ndata: {s}\n\n"
|
146 |
|
147 |
return StreamingResponse(output_stream(), media_type="text/event-stream")
|
index.html
CHANGED
@@ -45,20 +45,26 @@
|
|
45 |
|
46 |
.message {
|
47 |
margin-bottom: 20px;
|
48 |
-
padding: 12px;
|
49 |
-
border-radius:
|
50 |
font-size: 16px;
|
51 |
line-height: 1.5;
|
|
|
|
|
52 |
}
|
53 |
|
54 |
.message.user {
|
55 |
-
background-color: #
|
56 |
-
|
|
|
|
|
57 |
}
|
58 |
|
59 |
.message.assistant {
|
60 |
background-color: #262626;
|
61 |
-
|
|
|
|
|
62 |
}
|
63 |
|
64 |
.controls {
|
@@ -435,7 +441,7 @@
|
|
435 |
const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
|
436 |
eventSource.addEventListener("output", (event) => {
|
437 |
const eventJson = JSON.parse(event.data);
|
438 |
-
addMessage(
|
439 |
|
440 |
});
|
441 |
} catch (err) {
|
|
|
45 |
|
46 |
.message {
|
47 |
margin-bottom: 20px;
|
48 |
+
padding: 12px 16px;
|
49 |
+
border-radius: 8px;
|
50 |
font-size: 16px;
|
51 |
line-height: 1.5;
|
52 |
+
max-width: 70%;
|
53 |
+
clear: both;
|
54 |
}
|
55 |
|
56 |
.message.user {
|
57 |
+
background-color: #2c2c2c;
|
58 |
+
float: right;
|
59 |
+
border-bottom-right-radius: 2px;
|
60 |
+
border: 1px solid #404040;
|
61 |
}
|
62 |
|
63 |
.message.assistant {
|
64 |
background-color: #262626;
|
65 |
+
float: left;
|
66 |
+
border-bottom-left-radius: 2px;
|
67 |
+
border: 1px solid #333;
|
68 |
}
|
69 |
|
70 |
.controls {
|
|
|
441 |
const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
|
442 |
eventSource.addEventListener("output", (event) => {
|
443 |
const eventJson = JSON.parse(event.data);
|
444 |
+
addMessage(eventJson.role, eventJson.content);
|
445 |
|
446 |
});
|
447 |
} catch (err) {
|
requirements.txt
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
-
fastrtc[vad]==0.0.20
|
2 |
openai
|
|
|
3 |
python-dotenv
|
|
|
1 |
+
fastrtc[vad]==0.0.20.rc2
|
2 |
openai
|
3 |
+
twilio
|
4 |
python-dotenv
|