freddyaboulton HF Staff commited on
Commit
3b4d26e
Β·
verified Β·
1 Parent(s): 2106106

Upload folder using huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +32 -43
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import subprocess
2
 
3
- subprocess.run(["pip", "install", "fastrtc==0.0.3.post7"])
4
 
5
  import asyncio
6
  import base64
@@ -15,10 +15,9 @@ from fastrtc import (
15
  AsyncStreamHandler,
16
  Stream,
17
  get_twilio_turn_credentials,
18
- WebRTCError,
19
  audio_to_float32,
 
20
  )
21
- from fastapi import FastAPI
22
  from phonic.client import PhonicSTSClient, get_voices
23
 
24
  load_dotenv()
@@ -42,47 +41,38 @@ class PhonicHandler(AsyncStreamHandler):
42
  async def start_up(self):
43
  await self.wait_for_args()
44
  voice_id = self.latest_args[1]
45
- try:
46
- async with PhonicSTSClient(STS_URI, API_KEY) as client:
47
- self.client = client
48
- sts_stream = client.sts( # type: ignore
49
- input_format="pcm_44100",
50
- output_format="pcm_44100",
51
- system_prompt="You are a helpful voice assistant. Respond conversationally.",
52
- # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
53
- voice_id=voice_id,
54
- )
55
- async for message in sts_stream:
56
- message_type = message.get("type")
57
- if message_type == "audio_chunk":
58
- audio_b64 = message["audio"]
59
- audio_bytes = base64.b64decode(audio_b64)
60
- await self.output_queue.put(
61
- (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
62
- )
63
- if text := message.get("text"):
64
- msg = {"role": "assistant", "content": text}
65
- await self.output_queue.put(AdditionalOutputs(msg))
66
- elif message_type == "input_text":
67
- msg = {"role": "user", "content": message["text"]}
68
  await self.output_queue.put(AdditionalOutputs(msg))
69
- except Exception as e:
70
- raise WebRTCError(f"Error starting up: {e}")
 
71
 
72
  async def emit(self):
73
- try:
74
- return await self.output_queue.get()
75
- except Exception as e:
76
- raise WebRTCError(f"Error emitting: {e}")
77
 
78
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
79
- try:
80
- if not self.client:
81
- return
82
- audio_float32 = audio_to_float32(frame)
83
- await self.client.send_audio(audio_float32) # type: ignore
84
- except Exception as e:
85
- raise WebRTCError(f"Error sending audio: {e}")
86
 
87
  async def shutdown(self):
88
  if self.client:
@@ -111,7 +101,9 @@ stream = Stream(
111
  ],
112
  additional_outputs=[state, chatbot],
113
  additional_outputs_handler=add_to_chatbot,
114
- ui_args={"title": "Phonic Chat (Powered by FastRTC ⚑️)"},
 
 
115
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
116
  concurrency_limit=5 if get_space() else None,
117
  time_limit=90 if get_space() else None,
@@ -120,9 +112,6 @@ stream = Stream(
120
  with stream.ui:
121
  state.change(lambda s: s, inputs=state, outputs=chatbot)
122
 
123
- app = FastAPI()
124
- stream.mount(app)
125
-
126
  if __name__ == "__main__":
127
  if (mode := os.getenv("MODE")) == "UI":
128
  stream.ui.launch(server_port=7860)
 
1
  import subprocess
2
 
3
+ subprocess.run(["pip", "install", "fastrtc==0.0.4.post1"])
4
 
5
  import asyncio
6
  import base64
 
15
  AsyncStreamHandler,
16
  Stream,
17
  get_twilio_turn_credentials,
 
18
  audio_to_float32,
19
+ wait_for_item,
20
  )
 
21
  from phonic.client import PhonicSTSClient, get_voices
22
 
23
  load_dotenv()
 
41
  async def start_up(self):
42
  await self.wait_for_args()
43
  voice_id = self.latest_args[1]
44
+ async with PhonicSTSClient(STS_URI, API_KEY) as client:
45
+ self.client = client
46
+ sts_stream = client.sts( # type: ignore
47
+ input_format="pcm_44100",
48
+ output_format="pcm_44100",
49
+ system_prompt="You are a helpful voice assistant. Respond conversationally.",
50
+ # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
51
+ voice_id=voice_id,
52
+ )
53
+ async for message in sts_stream:
54
+ message_type = message.get("type")
55
+ if message_type == "audio_chunk":
56
+ audio_b64 = message["audio"]
57
+ audio_bytes = base64.b64decode(audio_b64)
58
+ await self.output_queue.put(
59
+ (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
60
+ )
61
+ if text := message.get("text"):
62
+ msg = {"role": "assistant", "content": text}
 
 
 
 
63
  await self.output_queue.put(AdditionalOutputs(msg))
64
+ elif message_type == "input_text":
65
+ msg = {"role": "user", "content": message["text"]}
66
+ await self.output_queue.put(AdditionalOutputs(msg))
67
 
68
  async def emit(self):
69
+ return await wait_for_item(self.output_queue)
 
 
 
70
 
71
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
72
+ if not self.client:
73
+ return
74
+ audio_float32 = audio_to_float32(frame)
75
+ await self.client.send_audio(audio_float32) # type: ignore
 
 
 
76
 
77
  async def shutdown(self):
78
  if self.client:
 
101
  ],
102
  additional_outputs=[state, chatbot],
103
  additional_outputs_handler=add_to_chatbot,
104
+ ui_args={
105
+ "title": "Phonic Chat (Powered by FastRTC ⚑️)",
106
+ },
107
  rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
108
  concurrency_limit=5 if get_space() else None,
109
  time_limit=90 if get_space() else None,
 
112
  with stream.ui:
113
  state.change(lambda s: s, inputs=state, outputs=chatbot)
114
 
 
 
 
115
  if __name__ == "__main__":
116
  if (mode := os.getenv("MODE")) == "UI":
117
  stream.ui.launch(server_port=7860)