freddyaboulton HF Staff commited on
Commit
40cff94
Β·
verified Β·
1 Parent(s): 0ebff01

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +10 -6
  2. app.py +123 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,12 +1,16 @@
1
  ---
2
- title: Phonic Chat
3
- emoji: πŸ’»
4
- colorFrom: pink
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 5.17.1
8
  app_file: app.py
9
  pinned: false
 
 
 
 
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Phonic AI Chat
3
+ emoji: πŸŽ™οΈ
4
+ colorFrom: purple
5
+ colorTo: red
6
  sdk: gradio
7
+ sdk_version: 5.16.0
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Talk to Phonic AI's speech-to-speech model
12
+ tags: [webrtc, websocket, gradio, secret|TWILIO_ACCOUNT_SID, secret|TWILIO_AUTH_TOKEN, secret|PHONIC_API_KEY]
13
+ python_version: 3.11
14
  ---
15
 
16
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import base64
3
+ import os
4
+
5
+ import gradio as gr
6
+ from gradio.utils import get_space
7
+ import numpy as np
8
+ from dotenv import load_dotenv
9
+ from fastrtc import (
10
+ AdditionalOutputs,
11
+ AsyncStreamHandler,
12
+ Stream,
13
+ get_twilio_turn_credentials,
14
+ WebRTCError,
15
+ audio_to_float32,
16
+ )
17
+ from fastapi import FastAPI
18
+ from phonic.client import PhonicSTSClient, get_voices
19
+
20
+ load_dotenv()
21
+
22
+ STS_URI = "wss://api.phonic.co/v1/sts/ws"
23
+ API_KEY = os.environ["PHONIC_API_KEY"]
24
+ SAMPLE_RATE = 44_100
25
+ voices = get_voices(API_KEY)
26
+ voice_ids = [voice["id"] for voice in voices]
27
+
28
+
29
+ class PhonicHandler(AsyncStreamHandler):
30
+ def __init__(self):
31
+ super().__init__(input_sample_rate=SAMPLE_RATE, output_sample_rate=SAMPLE_RATE)
32
+ self.output_queue = asyncio.Queue()
33
+ self.client = None
34
+
35
+ def copy(self) -> AsyncStreamHandler:
36
+ return PhonicHandler()
37
+
38
+ async def start_up(self):
39
+ await self.wait_for_args()
40
+ voice_id = self.latest_args[1]
41
+ try:
42
+ async with PhonicSTSClient(STS_URI, API_KEY) as client:
43
+ self.client = client
44
+ sts_stream = client.sts( # type: ignore
45
+ input_format="pcm_44100",
46
+ output_format="pcm_44100",
47
+ system_prompt="You are a helpful voice assistant. Respond conversationally.",
48
+ # welcome_message="Hello! I'm your voice assistant. How can I help you today?",
49
+ voice_id=voice_id,
50
+ )
51
+ async for message in sts_stream:
52
+ message_type = message.get("type")
53
+ if message_type == "audio_chunk":
54
+ audio_b64 = message["audio"]
55
+ audio_bytes = base64.b64decode(audio_b64)
56
+ await self.output_queue.put(
57
+ (SAMPLE_RATE, np.frombuffer(audio_bytes, dtype=np.int16))
58
+ )
59
+ if text := message.get("text"):
60
+ msg = {"role": "assistant", "content": text}
61
+ await self.output_queue.put(AdditionalOutputs(msg))
62
+ elif message_type == "input_text":
63
+ msg = {"role": "user", "content": message["text"]}
64
+ await self.output_queue.put(AdditionalOutputs(msg))
65
+ except Exception as e:
66
+ raise WebRTCError(f"Error starting up: {e}")
67
+
68
+ async def emit(self):
69
+ try:
70
+ return await self.output_queue.get()
71
+ except Exception as e:
72
+ raise WebRTCError(f"Error emitting: {e}")
73
+
74
+ async def receive(self, frame: tuple[int, np.ndarray]) -> None:
75
+ try:
76
+ if not self.client:
77
+ return
78
+ audio_float32 = audio_to_float32(frame)
79
+ await self.client.send_audio(audio_float32) # type: ignore
80
+ except Exception as e:
81
+ raise WebRTCError(f"Error sending audio: {e}")
82
+
83
+
84
+ def add_to_chatbot(state, chatbot, message):
85
+ state.append(message)
86
+ return state, gr.skip()
87
+
88
+
89
+ state = gr.State(value=[])
90
+ chatbot = gr.Chatbot(type="messages", value=[])
91
+ stream = Stream(
92
+ handler=PhonicHandler(),
93
+ mode="send-receive",
94
+ modality="audio",
95
+ additional_inputs=[
96
+ gr.Dropdown(
97
+ choices=voice_ids,
98
+ value="katherine",
99
+ label="Voice",
100
+ info="Select a voice from the dropdown",
101
+ )
102
+ ],
103
+ additional_outputs=[state, chatbot],
104
+ additional_outputs_handler=add_to_chatbot,
105
+ ui_args={"title": "Phonic Chat (Powered by FastRTC ⚑️)"},
106
+ rtc_configuration=get_twilio_turn_credentials() if get_space() else None,
107
+ concurrency_limit=5 if get_space() else None,
108
+ time_limit=90 if get_space() else None,
109
+ )
110
+
111
+ with stream.ui:
112
+ state.change(lambda s: s, inputs=state, outputs=chatbot)
113
+
114
+ app = FastAPI()
115
+ stream.mount(app)
116
+
117
+ if __name__ == "__main__":
118
+ if (mode := os.getenv("MODE")) == "UI":
119
+ stream.ui.launch(server_port=7860)
120
+ elif mode == "PHONE":
121
+ stream.fastphone(host="0.0.0.0", port=7860)
122
+ else:
123
+ stream.ui.launch(server_port=7860)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastrtc
2
+ python-dotenv
3
+ phonic-python
4
+ twilio