Update app.py
Browse files
app.py
CHANGED
@@ -28,6 +28,11 @@ async def serve_homepage():
|
|
28 |
"""Serve the chat interface HTML."""
|
29 |
with open("static/index.html", "r") as f:
|
30 |
return Response(content=f.read(), media_type="text/html")
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
@app.post("/stt/")
|
33 |
async def speech_to_text(file: UploadFile = File(...)):
|
@@ -132,6 +137,91 @@ async def chat_with_llm(file: UploadFile = File(...)):
|
|
132 |
}
|
133 |
)
|
134 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
135 |
except Exception as e:
|
136 |
print(f"Error processing audio: {str(e)}")
|
137 |
return Response(
|
|
|
28 |
"""Serve the chat interface HTML."""
|
29 |
with open("static/index.html", "r") as f:
|
30 |
return Response(content=f.read(), media_type="text/html")
|
31 |
+
@app.get("/conv")
|
32 |
+
async def serve_homepage():
|
33 |
+
"""Serve the chat interface HTML."""
|
34 |
+
with open("static/conv.html", "r") as f:
|
35 |
+
return Response(content=f.read(), media_type="text/html")
|
36 |
|
37 |
@app.post("/stt/")
|
38 |
async def speech_to_text(file: UploadFile = File(...)):
|
|
|
137 |
}
|
138 |
)
|
139 |
|
140 |
+
except Exception as e:
|
141 |
+
print(f"Error processing audio: {str(e)}")
|
142 |
+
return Response(
|
143 |
+
content=f"Error processing audio: {str(e)}",
|
144 |
+
media_type="text/plain",
|
145 |
+
status_code=500
|
146 |
+
)
|
147 |
+
@app.post("/continuous-chat/")
|
148 |
+
async def continuous_chat(
|
149 |
+
file: UploadFile = File(...),
|
150 |
+
chat_history: Optional[str] = Form(None)
|
151 |
+
):
|
152 |
+
"""Process input WAV with chat history, send text to LLM, and return response as WAV."""
|
153 |
+
# Initialize ggwave instance
|
154 |
+
instance = ggwave.init()
|
155 |
+
|
156 |
+
# Parse chat history if provided
|
157 |
+
messages = [{"role": "system", "content": "you are a helpful assistant. answer always in one sentence"}]
|
158 |
+
|
159 |
+
if chat_history:
|
160 |
+
try:
|
161 |
+
history = json.loads(chat_history)
|
162 |
+
for msg in history:
|
163 |
+
if msg["role"] in ["user", "assistant"]:
|
164 |
+
messages.append(msg)
|
165 |
+
except Exception as e:
|
166 |
+
print(f"Error parsing chat history: {str(e)}")
|
167 |
+
|
168 |
+
# Read the file content into memory
|
169 |
+
file_content = await file.read()
|
170 |
+
|
171 |
+
# Process the audio file
|
172 |
+
with io.BytesIO(file_content) as buffer:
|
173 |
+
try:
|
174 |
+
fs, recorded_waveform = wav.read(buffer)
|
175 |
+
recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
|
176 |
+
waveform_bytes = recorded_waveform.tobytes()
|
177 |
+
user_message = ggwave.decode(instance, waveform_bytes)
|
178 |
+
|
179 |
+
if user_message is None:
|
180 |
+
return Response(
|
181 |
+
content="No message detected in audio",
|
182 |
+
media_type="text/plain",
|
183 |
+
status_code=400
|
184 |
+
)
|
185 |
+
|
186 |
+
decoded_message = user_message.decode("utf-8")
|
187 |
+
print("user_message: " + decoded_message)
|
188 |
+
|
189 |
+
# Add user message to messages
|
190 |
+
messages.append({"role": "user", "content": decoded_message})
|
191 |
+
|
192 |
+
# Send to LLM with full chat history
|
193 |
+
chat_completion = client.chat.completions.create(
|
194 |
+
messages=messages,
|
195 |
+
model="llama-3.3-70b-versatile",
|
196 |
+
)
|
197 |
+
|
198 |
+
llm_response = chat_completion.choices[0].message.content
|
199 |
+
print(llm_response)
|
200 |
+
|
201 |
+
# Convert response to audio
|
202 |
+
encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
|
203 |
+
waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
|
204 |
+
waveform_int16 = np.int16(waveform_float32 * 32767)
|
205 |
+
|
206 |
+
# Save to buffer
|
207 |
+
buffer = io.BytesIO()
|
208 |
+
with wave.open(buffer, "wb") as wf:
|
209 |
+
wf.setnchannels(1)
|
210 |
+
wf.setsampwidth(2)
|
211 |
+
wf.setframerate(48000)
|
212 |
+
wf.writeframes(waveform_int16.tobytes())
|
213 |
+
|
214 |
+
buffer.seek(0)
|
215 |
+
|
216 |
+
return Response(
|
217 |
+
content=buffer.getvalue(),
|
218 |
+
media_type="audio/wav",
|
219 |
+
headers={
|
220 |
+
"X-User-Message": decoded_message,
|
221 |
+
"X-LLM-Response": llm_response
|
222 |
+
}
|
223 |
+
)
|
224 |
+
|
225 |
except Exception as e:
|
226 |
print(f"Error processing audio: {str(e)}")
|
227 |
return Response(
|