Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,7 +6,8 @@ from fastrtc import (
|
|
6 |
WebRTC,
|
7 |
WebRTCData,
|
8 |
WebRTCError,
|
9 |
-
|
|
|
10 |
get_stt_model,
|
11 |
)
|
12 |
from gradio.utils import get_space
|
@@ -20,13 +21,13 @@ conversations = {}
|
|
20 |
def response(
|
21 |
data: WebRTCData,
|
22 |
conversation: list[dict],
|
23 |
-
token: str | None = None,
|
24 |
model: str = "meta-llama/Llama-3.2-3B-Instruct",
|
25 |
provider: str = "sambanova",
|
26 |
):
|
|
|
27 |
print("conversation before", conversation)
|
28 |
-
if not provider.startswith("http") and not
|
29 |
-
raise WebRTCError("Please
|
30 |
|
31 |
if data.audio is not None and data.audio[1].size > 0:
|
32 |
user_audio_text = stt_model.stt(data.audio)
|
@@ -40,7 +41,7 @@ def response(
|
|
40 |
client = OpenAI(base_url=provider, api_key="ollama")
|
41 |
else:
|
42 |
client = huggingface_hub.InferenceClient(
|
43 |
-
api_key=
|
44 |
provider=provider, # type: ignore
|
45 |
)
|
46 |
|
@@ -95,10 +96,6 @@ with gr.Blocks(css=css) as demo:
|
|
95 |
"""
|
96 |
)
|
97 |
with gr.Sidebar():
|
98 |
-
login = gr.LoginButton()
|
99 |
-
token = gr.Textbox(
|
100 |
-
placeholder="Place your HF token here", type="password", label="HF Token"
|
101 |
-
)
|
102 |
model = gr.Dropdown(
|
103 |
choices=["meta-llama/Llama-3.2-3B-Instruct"],
|
104 |
allow_custom_value=True,
|
@@ -107,21 +104,23 @@ with gr.Blocks(css=css) as demo:
|
|
107 |
provider = gr.Dropdown(
|
108 |
label="Provider",
|
109 |
choices=providers,
|
110 |
-
value="
|
111 |
info="Select a hf-compatible provider or type the url of your server, e.g. http://127.0.0.1:11434/v1 for ollama",
|
112 |
allow_custom_value=True,
|
113 |
)
|
114 |
-
provider.change(hide_token, inputs=[provider], outputs=[token])
|
115 |
cb = gr.Chatbot(type="messages", height=600)
|
116 |
webrtc = WebRTC(
|
117 |
modality="audio",
|
118 |
mode="send",
|
119 |
variant="textbox",
|
120 |
-
rtc_configuration=
|
|
|
|
|
|
|
121 |
)
|
122 |
webrtc.stream(
|
123 |
ReplyOnPause(response), # type: ignore
|
124 |
-
inputs=[webrtc, cb,
|
125 |
outputs=[cb],
|
126 |
concurrency_limit=100,
|
127 |
)
|
@@ -130,4 +129,4 @@ with gr.Blocks(css=css) as demo:
|
|
130 |
)
|
131 |
|
132 |
if __name__ == "__main__":
|
133 |
-
demo.launch(server_port=7860)
|
|
|
6 |
WebRTC,
|
7 |
WebRTCData,
|
8 |
WebRTCError,
|
9 |
+
get_current_context,
|
10 |
+
get_hf_turn_credentials,
|
11 |
get_stt_model,
|
12 |
)
|
13 |
from gradio.utils import get_space
|
|
|
21 |
def response(
|
22 |
data: WebRTCData,
|
23 |
conversation: list[dict],
|
|
|
24 |
model: str = "meta-llama/Llama-3.2-3B-Instruct",
|
25 |
provider: str = "sambanova",
|
26 |
):
|
27 |
+
context = get_current_context()
|
28 |
print("conversation before", conversation)
|
29 |
+
if not provider.startswith("http") and not context.oauth_token:
|
30 |
+
raise WebRTCError("Please Sign in to use this demo.")
|
31 |
|
32 |
if data.audio is not None and data.audio[1].size > 0:
|
33 |
user_audio_text = stt_model.stt(data.audio)
|
|
|
41 |
client = OpenAI(base_url=provider, api_key="ollama")
|
42 |
else:
|
43 |
client = huggingface_hub.InferenceClient(
|
44 |
+
api_key=context.oauth_token.access_token, # type: ignore
|
45 |
provider=provider, # type: ignore
|
46 |
)
|
47 |
|
|
|
96 |
"""
|
97 |
)
|
98 |
with gr.Sidebar():
|
|
|
|
|
|
|
|
|
99 |
model = gr.Dropdown(
|
100 |
choices=["meta-llama/Llama-3.2-3B-Instruct"],
|
101 |
allow_custom_value=True,
|
|
|
104 |
provider = gr.Dropdown(
|
105 |
label="Provider",
|
106 |
choices=providers,
|
107 |
+
value="auto",
|
108 |
info="Select a hf-compatible provider or type the url of your server, e.g. http://127.0.0.1:11434/v1 for ollama",
|
109 |
allow_custom_value=True,
|
110 |
)
|
|
|
111 |
cb = gr.Chatbot(type="messages", height=600)
|
112 |
webrtc = WebRTC(
|
113 |
modality="audio",
|
114 |
mode="send",
|
115 |
variant="textbox",
|
116 |
+
rtc_configuration=get_hf_turn_credentials if get_space() else None,
|
117 |
+
server_rtc_configuration=get_hf_turn_credentials(ttl=3_600 * 24 * 30)
|
118 |
+
if get_space()
|
119 |
+
else None,
|
120 |
)
|
121 |
webrtc.stream(
|
122 |
ReplyOnPause(response), # type: ignore
|
123 |
+
inputs=[webrtc, cb, model, provider],
|
124 |
outputs=[cb],
|
125 |
concurrency_limit=100,
|
126 |
)
|
|
|
129 |
)
|
130 |
|
131 |
if __name__ == "__main__":
|
132 |
+
demo.launch(server_port=7860)
|