Spaces:

fastrtc
/

talk-to-openai

Running on CPU Upgrade

App Files Files Community

freddyaboulton HF Staff commited on Apr 15

Commit

63b1bda

verified ·

1 Parent(s): 397921a

Upload folder using huggingface_hub

Browse files

Files changed (4) hide show

README.md +2 -7
app.py +26 -6
index.html +12 -6
requirements.txt +2 -1

README.md CHANGED Viewed

@@ -4,17 +4,12 @@ emoji: 🗣️
 colorFrom: purple
 colorTo: red
 sdk: gradio
-sdk_version: 5.24.0
 app_file: app.py
 pinned: false
 license: mit
 short_description: Talk to OpenAI using their multimodal API
-tags:
-- webrtc
-- websocket
-- gradio
-- secret|HF_TOKEN
-- secret|OPENAI_API_KEY
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 colorFrom: purple
 colorTo: red
 sdk: gradio
+sdk_version: 5.16.0
 app_file: app.py
 pinned: false
 license: mit
 short_description: Talk to OpenAI using their multimodal API
+tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ from fastrtc import (
     AdditionalOutputs,
     AsyncStreamHandler,
     Stream,
-    get_cloudflare_turn_credentials_async,
     wait_for_item,
 )
 from gradio.utils import get_space
@@ -50,12 +50,32 @@ class OpenAIHandler(AsyncStreamHandler):
             model="gpt-4o-mini-realtime-preview-2024-12-17"
         ) as conn:
             await conn.session.update(
-                session={"turn_detection": {"type": "server_vad"}}
             )
             self.connection = conn
             async for event in self.connection:
                 if event.type == "response.audio_transcript.done":
-                    await self.output_queue.put(AdditionalOutputs(event))
                 if event.type == "response.audio.delta":
                     await self.output_queue.put(
                         (
@@ -97,7 +117,7 @@ stream = Stream(
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
     additional_outputs_handler=update_chatbot,
-    rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
     concurrency_limit=5 if get_space() else None,
     time_limit=90 if get_space() else None,
 )
@@ -109,7 +129,7 @@ stream.mount(app)
 @app.get("/")
 async def _():
-    rtc_config = await get_cloudflare_turn_credentials_async() if get_space() else None
     html_content = (cur_dir / "index.html").read_text()
     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
     return HTMLResponse(content=html_content)
@@ -121,7 +141,7 @@ def _(webrtc_id: str):
         import json
         async for output in stream.output_stream(webrtc_id):
-            s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
             yield f"event: output\ndata: {s}\n\n"
     return StreamingResponse(output_stream(), media_type="text/event-stream")

     AdditionalOutputs,
     AsyncStreamHandler,
     Stream,
+    get_twilio_turn_credentials,
     wait_for_item,
 )
 from gradio.utils import get_space
             model="gpt-4o-mini-realtime-preview-2024-12-17"
         ) as conn:
             await conn.session.update(
+                session={
+                    "turn_detection": {"type": "server_vad"},
+                    "input_audio_transcription": {
+                        "model": "whisper-1",
+                        "language": "en",
+                    },
+                }
             )
             self.connection = conn
             async for event in self.connection:
+                # Handle interruptions
+                if event.type == "input_audio_buffer.speech_started":
+                    self.clear_queue()
+                if (
+                    event.type
+                    == "conversation.item.input_audio_transcription.completed"
+                ):
+                    await self.output_queue.put(
+                        AdditionalOutputs({"role": "user", "content": event.transcript})
+                    )
                 if event.type == "response.audio_transcript.done":
+                    await self.output_queue.put(
+                        AdditionalOutputs(
+                            {"role": "assistant", "content": event.transcript}
+                        )
+                    )
                 if event.type == "response.audio.delta":
                     await self.output_queue.put(
                         (
     additional_inputs=[chatbot],
     additional_outputs=[chatbot],
     additional_outputs_handler=update_chatbot,
+    rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
     concurrency_limit=5 if get_space() else None,
     time_limit=90 if get_space() else None,
 )
 @app.get("/")
 async def _():
+    rtc_config = get_twilio_turn_credentials() if get_space() else None
     html_content = (cur_dir / "index.html").read_text()
     html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
     return HTMLResponse(content=html_content)
         import json
         async for output in stream.output_stream(webrtc_id):
+            s = json.dumps(output.args[0])
             yield f"event: output\ndata: {s}\n\n"
     return StreamingResponse(output_stream(), media_type="text/event-stream")

index.html CHANGED Viewed

@@ -45,20 +45,26 @@
         .message {
             margin-bottom: 20px;
-            padding: 12px;
-            border-radius: 4px;
             font-size: 16px;
             line-height: 1.5;
         }
         .message.user {
-            background-color: #1a1a1a;
-            margin-left: 20%;
         }
         .message.assistant {
             background-color: #262626;
-            margin-right: 20%;
         }
         .controls {
@@ -435,7 +441,7 @@
                 const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
-                    addMessage("assistant", eventJson.content);
                 });
             } catch (err) {

         .message {
             margin-bottom: 20px;
+            padding: 12px 16px;
+            border-radius: 8px;
             font-size: 16px;
             line-height: 1.5;
+            max-width: 70%;
+            clear: both;
         }
         .message.user {
+            background-color: #2c2c2c;
+            float: right;
+            border-bottom-right-radius: 2px;
+            border: 1px solid #404040;
         }
         .message.assistant {
             background-color: #262626;
+            float: left;
+            border-bottom-left-radius: 2px;
+            border: 1px solid #333;
         }
         .controls {
                 const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
                 eventSource.addEventListener("output", (event) => {
                     const eventJson = JSON.parse(event.data);
+                    addMessage(eventJson.role, eventJson.content);
                 });
             } catch (err) {

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-fastrtc[vad]==0.0.20
 openai
 python-dotenv

+fastrtc[vad]==0.0.20.rc2
 openai
+twilio
 python-dotenv