scooter7 commited on
Commit
59c59d4
·
verified ·
1 Parent(s): 697db26

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -21
app.py CHANGED
@@ -1,8 +1,7 @@
1
- import os
2
- os.environ["CUDA_VISIBLE_DEVICES"] = ""
3
  import asyncio
4
  import base64
5
  import json
 
6
  import pathlib
7
  from typing import AsyncGenerator, Literal, List
8
 
@@ -10,11 +9,11 @@ import numpy as np
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI
12
  from fastapi.responses import HTMLResponse
13
- from fastrtc import AsyncStreamHandler, Stream, get_twilio_turn_credentials, wait_for_item
14
  from pydantic import BaseModel
15
  import uvicorn
16
 
17
- # --- Import get_space to detect Hugging Face Spaces ---
18
  from gradio.utils import get_space
19
 
20
  # --- Document processing and RAG libraries ---
@@ -116,7 +115,7 @@ def generate_answer(query: str) -> str:
116
  # 2. Speech-to-Text and Text-to-Speech Functions
117
  # ====================================================
118
 
119
- # Load Whisper model for speech-to-text
120
  stt_model = whisper.load_model("base", device="cpu")
121
 
122
  def speech_to_text(audio_array: np.ndarray, sample_rate: int = 16000) -> str:
@@ -158,7 +157,6 @@ class RAGVoiceHandler(AsyncStreamHandler):
158
  self.last_input_time = asyncio.get_event_loop().time()
159
 
160
  def copy(self) -> "RAGVoiceHandler":
161
- # Return a new instance with the same configuration
162
  return RAGVoiceHandler(
163
  expected_layout="mono",
164
  output_sample_rate=self.output_sample_rate,
@@ -166,7 +164,6 @@ class RAGVoiceHandler(AsyncStreamHandler):
166
  )
167
 
168
  async def stream(self) -> AsyncGenerator[bytes, None]:
169
- # Continuously check for new audio; if a short silence occurs (timeout), process the buffered utterance.
170
  while not self.quit.is_set():
171
  try:
172
  audio_data = await asyncio.wait_for(self.input_queue.get(), timeout=0.5)
@@ -174,7 +171,6 @@ class RAGVoiceHandler(AsyncStreamHandler):
174
  self.last_input_time = asyncio.get_event_loop().time()
175
  except asyncio.TimeoutError:
176
  if self.input_buffer:
177
- # Process the buffered utterance
178
  audio_array = np.frombuffer(self.input_buffer, dtype=np.int16)
179
  self.input_buffer = bytearray()
180
  query_text = speech_to_text(audio_array, sample_rate=self.input_sample_rate)
@@ -187,7 +183,6 @@ class RAGVoiceHandler(AsyncStreamHandler):
187
  await asyncio.sleep(0.1)
188
 
189
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
190
- # Each received frame is added as bytes to the input queue
191
  sample_rate, audio_array = frame
192
  audio_bytes = audio_array.tobytes()
193
  await self.input_queue.put(audio_bytes)
@@ -202,13 +197,10 @@ class RAGVoiceHandler(AsyncStreamHandler):
202
  # 4. Voice Streaming Setup & FastAPI Endpoints
203
  # ====================================================
204
 
205
- # When running on Hugging Face Spaces, supply a dummy RTC configuration.
206
- if get_space():
207
- rtc_config = {"iceServers": [{"urls": "stun:stun.l.google.com:19302"}]}
208
- else:
209
- rtc_config = get_twilio_turn_credentials()
210
 
211
- # Create a Stream instance using our RAGVoiceHandler.
212
  stream = Stream(
213
  modality="audio",
214
  mode="send-receive",
@@ -218,7 +210,6 @@ stream = Stream(
218
  time_limit=90,
219
  )
220
 
221
- # Define a simple input hook (if needed by the client to initialize the call)
222
  class InputData(BaseModel):
223
  webrtc_id: str
224
 
@@ -230,13 +221,10 @@ async def input_hook(body: InputData):
230
  stream.set_input(body.webrtc_id)
231
  return {"status": "ok"}
232
 
233
- # Endpoint to handle WebRTC offer from the client (for voice calls)
234
  @app.post("/webrtc/offer")
235
  async def webrtc_offer(offer: dict):
236
- # This uses fastrtc's built-in handling of the offer to set up the connection.
237
  return await stream.handle_offer(offer)
238
 
239
- # Serve your existing HTML file (which contains your voice UI)
240
  @app.get("/")
241
  async def index():
242
  index_path = current_dir / "index.html"
@@ -250,14 +238,12 @@ async def index():
250
  if __name__ == "__main__":
251
  mode = os.getenv("MODE", "PHONE")
252
  if mode == "UI":
253
- # Optionally launch a text-based Gradio interface for testing the RAG backend
254
  import gradio as gr
255
  def gradio_chat(user_input):
256
  return generate_answer(user_input)
257
  iface = gr.Interface(fn=gradio_chat, inputs="text", outputs="text", title="Customer Support Chatbot")
258
  iface.launch(server_port=7860)
259
  elif mode == "PHONE":
260
- # Run the FastAPI app so that callers can use the voice functionality.
261
  uvicorn.run(app, host="0.0.0.0", port=7860)
262
  else:
263
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
 
 
1
  import asyncio
2
  import base64
3
  import json
4
+ import os
5
  import pathlib
6
  from typing import AsyncGenerator, Literal, List
7
 
 
9
  from dotenv import load_dotenv
10
  from fastapi import FastAPI
11
  from fastapi.responses import HTMLResponse
12
+ from fastrtc import AsyncStreamHandler, Stream, wait_for_item
13
  from pydantic import BaseModel
14
  import uvicorn
15
 
16
+ # --- Import get_space to detect Hugging Face Spaces (optional) ---
17
  from gradio.utils import get_space
18
 
19
  # --- Document processing and RAG libraries ---
 
115
  # 2. Speech-to-Text and Text-to-Speech Functions
116
  # ====================================================
117
 
118
+ # Force Whisper to load on CPU explicitly
119
  stt_model = whisper.load_model("base", device="cpu")
120
 
121
  def speech_to_text(audio_array: np.ndarray, sample_rate: int = 16000) -> str:
 
157
  self.last_input_time = asyncio.get_event_loop().time()
158
 
159
  def copy(self) -> "RAGVoiceHandler":
 
160
  return RAGVoiceHandler(
161
  expected_layout="mono",
162
  output_sample_rate=self.output_sample_rate,
 
164
  )
165
 
166
  async def stream(self) -> AsyncGenerator[bytes, None]:
 
167
  while not self.quit.is_set():
168
  try:
169
  audio_data = await asyncio.wait_for(self.input_queue.get(), timeout=0.5)
 
171
  self.last_input_time = asyncio.get_event_loop().time()
172
  except asyncio.TimeoutError:
173
  if self.input_buffer:
 
174
  audio_array = np.frombuffer(self.input_buffer, dtype=np.int16)
175
  self.input_buffer = bytearray()
176
  query_text = speech_to_text(audio_array, sample_rate=self.input_sample_rate)
 
183
  await asyncio.sleep(0.1)
184
 
185
  async def receive(self, frame: tuple[int, np.ndarray]) -> None:
 
186
  sample_rate, audio_array = frame
187
  audio_bytes = audio_array.tobytes()
188
  await self.input_queue.put(audio_bytes)
 
197
  # 4. Voice Streaming Setup & FastAPI Endpoints
198
  # ====================================================
199
 
200
+ # For ZeroGPU spaces, supply a dummy RTC configuration.
201
+ # (This avoids calling get_twilio_turn_credentials() which depends on NVML.)
202
+ rtc_config = {"iceServers": [{"urls": "stun:stun.l.google.com:19302"}]}
 
 
203
 
 
204
  stream = Stream(
205
  modality="audio",
206
  mode="send-receive",
 
210
  time_limit=90,
211
  )
212
 
 
213
  class InputData(BaseModel):
214
  webrtc_id: str
215
 
 
221
  stream.set_input(body.webrtc_id)
222
  return {"status": "ok"}
223
 
 
224
  @app.post("/webrtc/offer")
225
  async def webrtc_offer(offer: dict):
 
226
  return await stream.handle_offer(offer)
227
 
 
228
  @app.get("/")
229
  async def index():
230
  index_path = current_dir / "index.html"
 
238
  if __name__ == "__main__":
239
  mode = os.getenv("MODE", "PHONE")
240
  if mode == "UI":
 
241
  import gradio as gr
242
  def gradio_chat(user_input):
243
  return generate_answer(user_input)
244
  iface = gr.Interface(fn=gradio_chat, inputs="text", outputs="text", title="Customer Support Chatbot")
245
  iface.launch(server_port=7860)
246
  elif mode == "PHONE":
 
247
  uvicorn.run(app, host="0.0.0.0", port=7860)
248
  else:
249
  uvicorn.run(app, host="0.0.0.0", port=7860)