Spaces:

yasserrmd
/

ggwave

Sleeping

App Files Files Community

yasserrmd commited on Feb 25

Commit

3b4968e

verified ·

1 Parent(s): 2fb2e7b

Create app.py

Browse files

Files changed (1) hide show

app.py +86 -0

app.py ADDED Viewed

	@@ -0,0 +1,86 @@

+from fastapi import FastAPI, UploadFile, File, Response, Request
+from fastapi.staticfiles import StaticFiles
+import ggwave
+import scipy.io.wavfile as wav
+import numpy as np
+import os
+from pydantic import BaseModel
+from groq import Groq
+import io
+app = FastAPI()
+# Serve static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Initialize ggwave instance
+instance = ggwave.init()
+# Initialize Groq client
+client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
+class TextInput(BaseModel):
+    text: str
+@app.get("/")
+async def serve_homepage():
+    """Serve the chat interface HTML."""
+    with open("static/index.html", "r") as f:
+        return Response(content=f.read(), media_type="text/html")
+@app.post("/stt/")
+async def speech_to_text(file: UploadFile = File(...)):
+    """Convert WAV audio file to text using ggwave."""
+    with open("temp.wav", "wb") as audio_file:
+        audio_file.write(await file.read())
+    # Load WAV file
+    fs, recorded_waveform = wav.read("temp.wav")
+    os.remove("temp.wav")
+    # Convert to bytes and decode
+    waveform_bytes = recorded_waveform.astype(np.uint8).tobytes()
+    decoded_message = ggwave.decode(instance, waveform_bytes)
+    return {"text": decoded_message}
+@app.post("/tts/")
+def text_to_speech(input_text: TextInput):
+    """Convert text to a WAV audio file using ggwave and return as response."""
+    encoded_waveform = ggwave.encode(instance, input_text.text)
+    buffer = io.BytesIO()
+    wav.write(buffer, 44100, np.frombuffer(encoded_waveform, dtype=np.uint8))
+    buffer.seek(0)
+    return Response(content=buffer.getvalue(), media_type="audio/wav")
+@app.post("/chat/")
+async def chat_with_llm(file: UploadFile = File(...)):
+    """Process input WAV, send text to LLM, and return generated response as WAV."""
+    with open("input_chat.wav", "wb") as audio_file:
+        audio_file.write(await file.read())
+    # Load WAV file
+    fs, recorded_waveform = wav.read("input_chat.wav")
+    os.remove("input_chat.wav")
+    # Convert to bytes and decode
+    waveform_bytes = recorded_waveform.astype(np.uint8).tobytes()
+    user_message = ggwave.decode(instance, waveform_bytes)
+    # Send to LLM
+    chat_completion = client.chat.completions.create(
+        messages=[{"role": "user", "content": user_message}],
+        model="llama-3.3-70b-versatile",
+    )
+    llm_response = chat_completion.choices[0].message.content
+    # Convert response to audio
+    response_waveform = ggwave.encode(instance, llm_response)
+    buffer = io.BytesIO()
+    wav.write(buffer, 44100, np.frombuffer(response_waveform, dtype=np.uint8))
+    buffer.seek(0)
+    return Response(content=buffer.getvalue(), media_type="audio/wav", headers={
+        "X-User-Message": user_message,
+        "X-LLM-Response": llm_response
+    })