freddyaboulton HF Staff commited on
Commit
63b1bda
·
verified ·
1 Parent(s): 397921a

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +2 -7
  2. app.py +26 -6
  3. index.html +12 -6
  4. requirements.txt +2 -1
README.md CHANGED
@@ -4,17 +4,12 @@ emoji: 🗣️
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: gradio
7
- sdk_version: 5.24.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Talk to OpenAI using their multimodal API
12
- tags:
13
- - webrtc
14
- - websocket
15
- - gradio
16
- - secret|HF_TOKEN
17
- - secret|OPENAI_API_KEY
18
  ---
19
 
20
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
4
  colorFrom: purple
5
  colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.16.0
8
  app_file: app.py
9
  pinned: false
10
  license: mit
11
  short_description: Talk to OpenAI using their multimodal API
12
+ tags: [webrtc, websocket, gradio, secret|HF_TOKEN, secret|OPENAI_API_KEY]
 
 
 
 
 
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -13,7 +13,7 @@ from fastrtc import (
13
  AdditionalOutputs,
14
  AsyncStreamHandler,
15
  Stream,
16
- get_cloudflare_turn_credentials_async,
17
  wait_for_item,
18
  )
19
  from gradio.utils import get_space
@@ -50,12 +50,32 @@ class OpenAIHandler(AsyncStreamHandler):
50
  model="gpt-4o-mini-realtime-preview-2024-12-17"
51
  ) as conn:
52
  await conn.session.update(
53
- session={"turn_detection": {"type": "server_vad"}}
 
 
 
 
 
 
54
  )
55
  self.connection = conn
56
  async for event in self.connection:
 
 
 
 
 
 
 
 
 
 
57
  if event.type == "response.audio_transcript.done":
58
- await self.output_queue.put(AdditionalOutputs(event))
 
 
 
 
59
  if event.type == "response.audio.delta":
60
  await self.output_queue.put(
61
  (
@@ -97,7 +117,7 @@ stream = Stream(
97
  additional_inputs=[chatbot],
98
  additional_outputs=[chatbot],
99
  additional_outputs_handler=update_chatbot,
100
- rtc_configuration=get_cloudflare_turn_credentials_async if get_space() else None,
101
  concurrency_limit=5 if get_space() else None,
102
  time_limit=90 if get_space() else None,
103
  )
@@ -109,7 +129,7 @@ stream.mount(app)
109
 
110
  @app.get("/")
111
  async def _():
112
- rtc_config = await get_cloudflare_turn_credentials_async() if get_space() else None
113
  html_content = (cur_dir / "index.html").read_text()
114
  html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
115
  return HTMLResponse(content=html_content)
@@ -121,7 +141,7 @@ def _(webrtc_id: str):
121
  import json
122
 
123
  async for output in stream.output_stream(webrtc_id):
124
- s = json.dumps({"role": "assistant", "content": output.args[0].transcript})
125
  yield f"event: output\ndata: {s}\n\n"
126
 
127
  return StreamingResponse(output_stream(), media_type="text/event-stream")
 
13
  AdditionalOutputs,
14
  AsyncStreamHandler,
15
  Stream,
16
+ get_twilio_turn_credentials,
17
  wait_for_item,
18
  )
19
  from gradio.utils import get_space
 
50
  model="gpt-4o-mini-realtime-preview-2024-12-17"
51
  ) as conn:
52
  await conn.session.update(
53
+ session={
54
+ "turn_detection": {"type": "server_vad"},
55
+ "input_audio_transcription": {
56
+ "model": "whisper-1",
57
+ "language": "en",
58
+ },
59
+ }
60
  )
61
  self.connection = conn
62
  async for event in self.connection:
63
+ # Handle interruptions
64
+ if event.type == "input_audio_buffer.speech_started":
65
+ self.clear_queue()
66
+ if (
67
+ event.type
68
+ == "conversation.item.input_audio_transcription.completed"
69
+ ):
70
+ await self.output_queue.put(
71
+ AdditionalOutputs({"role": "user", "content": event.transcript})
72
+ )
73
  if event.type == "response.audio_transcript.done":
74
+ await self.output_queue.put(
75
+ AdditionalOutputs(
76
+ {"role": "assistant", "content": event.transcript}
77
+ )
78
+ )
79
  if event.type == "response.audio.delta":
80
  await self.output_queue.put(
81
  (
 
117
  additional_inputs=[chatbot],
118
  additional_outputs=[chatbot],
119
  additional_outputs_handler=update_chatbot,
120
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
121
  concurrency_limit=5 if get_space() else None,
122
  time_limit=90 if get_space() else None,
123
  )
 
129
 
130
  @app.get("/")
131
  async def _():
132
+ rtc_config = get_twilio_turn_credentials() if get_space() else None
133
  html_content = (cur_dir / "index.html").read_text()
134
  html_content = html_content.replace("__RTC_CONFIGURATION__", json.dumps(rtc_config))
135
  return HTMLResponse(content=html_content)
 
141
  import json
142
 
143
  async for output in stream.output_stream(webrtc_id):
144
+ s = json.dumps(output.args[0])
145
  yield f"event: output\ndata: {s}\n\n"
146
 
147
  return StreamingResponse(output_stream(), media_type="text/event-stream")
index.html CHANGED
@@ -45,20 +45,26 @@
45
 
46
  .message {
47
  margin-bottom: 20px;
48
- padding: 12px;
49
- border-radius: 4px;
50
  font-size: 16px;
51
  line-height: 1.5;
 
 
52
  }
53
 
54
  .message.user {
55
- background-color: #1a1a1a;
56
- margin-left: 20%;
 
 
57
  }
58
 
59
  .message.assistant {
60
  background-color: #262626;
61
- margin-right: 20%;
 
 
62
  }
63
 
64
  .controls {
@@ -435,7 +441,7 @@
435
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
436
  eventSource.addEventListener("output", (event) => {
437
  const eventJson = JSON.parse(event.data);
438
- addMessage("assistant", eventJson.content);
439
 
440
  });
441
  } catch (err) {
 
45
 
46
  .message {
47
  margin-bottom: 20px;
48
+ padding: 12px 16px;
49
+ border-radius: 8px;
50
  font-size: 16px;
51
  line-height: 1.5;
52
+ max-width: 70%;
53
+ clear: both;
54
  }
55
 
56
  .message.user {
57
+ background-color: #2c2c2c;
58
+ float: right;
59
+ border-bottom-right-radius: 2px;
60
+ border: 1px solid #404040;
61
  }
62
 
63
  .message.assistant {
64
  background-color: #262626;
65
+ float: left;
66
+ border-bottom-left-radius: 2px;
67
+ border: 1px solid #333;
68
  }
69
 
70
  .controls {
 
441
  const eventSource = new EventSource('/outputs?webrtc_id=' + webrtc_id);
442
  eventSource.addEventListener("output", (event) => {
443
  const eventJson = JSON.parse(event.data);
444
+ addMessage(eventJson.role, eventJson.content);
445
 
446
  });
447
  } catch (err) {
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- fastrtc[vad]==0.0.20
2
  openai
 
3
  python-dotenv
 
1
+ fastrtc[vad]==0.0.20.rc2
2
  openai
3
+ twilio
4
  python-dotenv