yasserrmd commited on
Commit
39d90db
·
verified ·
1 Parent(s): dad5570

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +90 -0
app.py CHANGED
@@ -28,6 +28,11 @@ async def serve_homepage():
28
  """Serve the chat interface HTML."""
29
  with open("static/index.html", "r") as f:
30
  return Response(content=f.read(), media_type="text/html")
 
 
 
 
 
31
 
32
  @app.post("/stt/")
33
  async def speech_to_text(file: UploadFile = File(...)):
@@ -132,6 +137,91 @@ async def chat_with_llm(file: UploadFile = File(...)):
132
  }
133
  )
134
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
135
  except Exception as e:
136
  print(f"Error processing audio: {str(e)}")
137
  return Response(
 
28
  """Serve the chat interface HTML."""
29
  with open("static/index.html", "r") as f:
30
  return Response(content=f.read(), media_type="text/html")
31
+ @app.get("/conv")
32
+ async def serve_homepage():
33
+ """Serve the chat interface HTML."""
34
+ with open("static/conv.html", "r") as f:
35
+ return Response(content=f.read(), media_type="text/html")
36
 
37
  @app.post("/stt/")
38
  async def speech_to_text(file: UploadFile = File(...)):
 
137
  }
138
  )
139
 
140
+ except Exception as e:
141
+ print(f"Error processing audio: {str(e)}")
142
+ return Response(
143
+ content=f"Error processing audio: {str(e)}",
144
+ media_type="text/plain",
145
+ status_code=500
146
+ )
147
+ @app.post("/continuous-chat/")
148
+ async def continuous_chat(
149
+ file: UploadFile = File(...),
150
+ chat_history: Optional[str] = Form(None)
151
+ ):
152
+ """Process input WAV with chat history, send text to LLM, and return response as WAV."""
153
+ # Initialize ggwave instance
154
+ instance = ggwave.init()
155
+
156
+ # Parse chat history if provided
157
+ messages = [{"role": "system", "content": "you are a helpful assistant. answer always in one sentence"}]
158
+
159
+ if chat_history:
160
+ try:
161
+ history = json.loads(chat_history)
162
+ for msg in history:
163
+ if msg["role"] in ["user", "assistant"]:
164
+ messages.append(msg)
165
+ except Exception as e:
166
+ print(f"Error parsing chat history: {str(e)}")
167
+
168
+ # Read the file content into memory
169
+ file_content = await file.read()
170
+
171
+ # Process the audio file
172
+ with io.BytesIO(file_content) as buffer:
173
+ try:
174
+ fs, recorded_waveform = wav.read(buffer)
175
+ recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
176
+ waveform_bytes = recorded_waveform.tobytes()
177
+ user_message = ggwave.decode(instance, waveform_bytes)
178
+
179
+ if user_message is None:
180
+ return Response(
181
+ content="No message detected in audio",
182
+ media_type="text/plain",
183
+ status_code=400
184
+ )
185
+
186
+ decoded_message = user_message.decode("utf-8")
187
+ print("user_message: " + decoded_message)
188
+
189
+ # Add user message to messages
190
+ messages.append({"role": "user", "content": decoded_message})
191
+
192
+ # Send to LLM with full chat history
193
+ chat_completion = client.chat.completions.create(
194
+ messages=messages,
195
+ model="llama-3.3-70b-versatile",
196
+ )
197
+
198
+ llm_response = chat_completion.choices[0].message.content
199
+ print(llm_response)
200
+
201
+ # Convert response to audio
202
+ encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
203
+ waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
204
+ waveform_int16 = np.int16(waveform_float32 * 32767)
205
+
206
+ # Save to buffer
207
+ buffer = io.BytesIO()
208
+ with wave.open(buffer, "wb") as wf:
209
+ wf.setnchannels(1)
210
+ wf.setsampwidth(2)
211
+ wf.setframerate(48000)
212
+ wf.writeframes(waveform_int16.tobytes())
213
+
214
+ buffer.seek(0)
215
+
216
+ return Response(
217
+ content=buffer.getvalue(),
218
+ media_type="audio/wav",
219
+ headers={
220
+ "X-User-Message": decoded_message,
221
+ "X-LLM-Response": llm_response
222
+ }
223
+ )
224
+
225
  except Exception as e:
226
  print(f"Error processing audio: {str(e)}")
227
  return Response(