Spaces:

SalexAI
/

cbx

Sleeping

App Files Files Community

SalexAI commited on May 10

Commit

60778ba

verified ·

1 Parent(s): e5956ee

Update app.py

Browse files

Files changed (1) hide show

app.py +47 -25

app.py CHANGED Viewed

@@ -1,34 +1,61 @@
 import os
 import httpx
-from fastrtc import ReplyOnPause, Stream, get_stt_model, get_tts_model, StreamHandlerBase
 from openai import OpenAI
-# Initialize Sambanova Client
 sambanova_client = OpenAI(
-    api_key=os.getenv("key"), base_url="https://api.deepinfra.com/v1"
 )
-# Load STT and TTS models
 stt_model = get_stt_model()
 tts_model = get_tts_model()
-# Create a proper handler subclass
-class EchoHandler(StreamHandlerBase):
     def __init__(self):
-        super().__init__()
-    def on_audio(self, audio):
-        prompt = stt_model.stt(audio)
         response = sambanova_client.chat.completions.create(
             model="mistralai/Mistral-Small-24B-Instruct-2501",
-            messages=[{"role": "user", "content": prompt}],
             max_tokens=200,
         )
         reply = response.choices[0].message.content
-        for audio_chunk in tts_model.stream_tts_sync(reply):
-            yield audio_chunk
-# Dummy TURN config
 def get_cloudflare_turn_credentials(
     turn_key_id=None,
     turn_key_api_token=None,
@@ -36,20 +63,15 @@ def get_cloudflare_turn_credentials(
     ttl=600,
     client: httpx.AsyncClient | None = None,
 ):
-    return {
-        "iceServers": [
-            {
-                "urls": ["stun:stun.l.google.com:19302"]
-            }
-        ]
-    }
-# Launch stream with correct handler
 stream = Stream(
     handler=EchoHandler(),
-    rtc_configuration=get_cloudflare_turn_credentials,
     modality="audio",
-    mode="send-receive"
 )
 stream.fastphone()

 import os
 import httpx
+import numpy as np
+from queue import Queue, Empty
+from fastrtc import Stream, StreamHandler, get_stt_model, get_tts_model
 from openai import OpenAI
+# Initialize OpenAI client and on-device models
 sambanova_client = OpenAI(
+    api_key=os.getenv("key"),
+    base_url="https://api.deepinfra.com/v1"
 )
 stt_model = get_stt_model()
 tts_model = get_tts_model()
+class EchoHandler(StreamHandler):
     def __init__(self):
+        super().__init__()  # uses default sample rates/layouts
+        self.queue: Queue[tuple[int, np.ndarray]] = Queue()
+    def start_up(self) -> None:
+        # Optional: warm up models or state here
+        pass
+    def receive(self, frame: tuple[int, np.ndarray]) -> None:
+        # frame is (sample_rate, numpy array)
+        sample_rate, audio_array = frame
+        # 1) Transcribe speech → text
+        text = stt_model.stt(frame)
+        # 2) Chat completion
         response = sambanova_client.chat.completions.create(
             model="mistralai/Mistral-Small-24B-Instruct-2501",
+            messages=[{"role": "user", "content": text}],
             max_tokens=200,
         )
         reply = response.choices[0].message.content
+        # 3) Generate TTS chunks and enqueue them
+        for tts_chunk in tts_model.stream_tts_sync(reply):
+            # each tts_chunk is a numpy array of shape (1, N)
+            self.queue.put((sample_rate, tts_chunk))
+    def emit(self):
+        try:
+            return self.queue.get_nowait()
+        except Empty:
+            return None  # no audio to send right now
+    def copy(self) -> "EchoHandler":
+        return EchoHandler()
+    def shutdown(self) -> None:
+        # Optional cleanup
+        pass
 def get_cloudflare_turn_credentials(
     turn_key_id=None,
     turn_key_api_token=None,
     ttl=600,
     client: httpx.AsyncClient | None = None,
 ):
+    # Replace with your real TURN creds logic
+    return {"iceServers": [{"urls": ["stun:stun.l.google.com:19302"]}]}
+# Wire up the stream with the new handler
 stream = Stream(
     handler=EchoHandler(),
     modality="audio",
+    mode="send-receive",
+    rtc_configuration=get_cloudflare_turn_credentials
 )
 stream.fastphone()