yasserrmd commited on
Commit
5f40ba9
·
verified ·
1 Parent(s): 4fb332f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -68
app.py CHANGED
@@ -75,77 +75,95 @@ def text_to_speech(input_text: TextInput):
75
  @app.post("/chat/")
76
  async def chat_with_llm(file: UploadFile = File(...)):
77
  """Process input WAV, send text to LLM, and return generated response as WAV."""
78
- # Initialize ggwave instance
79
- instance = ggwave.init()
80
-
81
- # Read the file content into memory without saving to disk
82
- file_content = await file.read()
83
-
84
- # Create a BytesIO object to use with wav.read
85
- with io.BytesIO(file_content) as buffer:
86
- try:
87
- fs, recorded_waveform = wav.read(buffer)
88
- recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
89
- waveform_bytes = recorded_waveform.tobytes()
90
- user_message = ggwave.decode(instance, waveform_bytes)
91
-
92
- if user_message is None:
93
- return Response(
94
- content="No message detected in audio",
95
- media_type="text/plain",
96
- status_code=400
97
- )
98
-
99
- print("user_message: " + user_message.decode("utf-8"))
100
-
101
- # Send to LLM
102
- chat_completion = client.chat.completions.create(
103
- messages=[
104
- {"role": "system", "content": "you are a helpful assistant. answer always in one sentence"},
105
- {"role": "user", "content": user_message.decode("utf-8")}
106
- ],
107
- model="llama-3.3-70b-versatile",
108
- )
109
-
110
- llm_response = chat_completion.choices[0].message.content
111
- print(llm_response)
112
-
113
- # Convert response to audio
114
- encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
115
-
116
- # Convert byte data into float32 array
117
- waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
118
-
119
- # Normalize float32 data to the range of int16
120
- waveform_int16 = np.int16(waveform_float32 * 32767)
121
-
122
- # Save to buffer instead of a file
123
- buffer = io.BytesIO()
124
- with wave.open(buffer, "wb") as wf:
125
- wf.setnchannels(1) # Mono audio
126
- wf.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
127
- wf.setframerate(48000) # Sample rate
128
- wf.writeframes(waveform_int16.tobytes()) # Write waveform as bytes
129
-
130
- buffer.seek(0)
131
- ggwave.free(instance)
132
- return Response(
133
- content=buffer.getvalue(),
134
- media_type="audio/wav",
135
- headers={
136
- "X-User-Message": user_message.decode("utf-8"),
137
- "X-LLM-Response": llm_response
138
- }
139
- )
140
-
141
- except Exception as e:
142
- print(f"Error processing audio: {str(e)}")
143
- ggwave.free(instance)
144
  return Response(
145
- content=f"Error processing audio: {str(e)}",
146
  media_type="text/plain",
147
- status_code=500
148
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
  @app.post("/continuous-chat/")
150
  async def continuous_chat(
151
  file: UploadFile = File(...),
 
75
  @app.post("/chat/")
76
  async def chat_with_llm(file: UploadFile = File(...)):
77
  """Process input WAV, send text to LLM, and return generated response as WAV."""
78
+ try:
79
+ # Log file details
80
+ print(f"File received: {file.filename}, Content-Type: {file.content_type}")
81
+
82
+ # Read the file content into memory
83
+ file_content = await file.read()
84
+ if not file_content:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
  return Response(
86
+ content="Empty file uploaded",
87
  media_type="text/plain",
88
+ status_code=400
89
  )
90
+
91
+ # Initialize ggwave instance
92
+ instance = ggwave.init()
93
+
94
+ # Create a BytesIO object to use with wav.read
95
+ with io.BytesIO(file_content) as buffer:
96
+ try:
97
+ fs, recorded_waveform = wav.read(buffer)
98
+ recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
99
+ waveform_bytes = recorded_waveform.tobytes()
100
+ user_message = ggwave.decode(instance, waveform_bytes)
101
+
102
+ if user_message is None:
103
+ return Response(
104
+ content="No message detected in audio",
105
+ media_type="text/plain",
106
+ status_code=400
107
+ )
108
+
109
+ print("Decoded user message:", user_message.decode("utf-8"))
110
+
111
+ # Send to LLM
112
+ chat_completion = client.chat.completions.create(
113
+ messages=[
114
+ {"role": "system", "content": "you are a helpful assistant. answer always in one sentence"},
115
+ {"role": "user", "content": user_message.decode("utf-8")}
116
+ ],
117
+ model="llama-3.3-70b-versatile",
118
+ )
119
+
120
+ llm_response = chat_completion.choices[0].message.content
121
+ print("LLM Response:", llm_response)
122
+
123
+ # Convert response to audio
124
+ encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
125
+
126
+ # Convert byte data into float32 array
127
+ waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
128
+
129
+ # Normalize float32 data to the range of int16
130
+ waveform_int16 = np.int16(waveform_float32 * 32767)
131
+
132
+ # Save to buffer instead of a file
133
+ buffer = io.BytesIO()
134
+ with wave.open(buffer, "wb") as wf:
135
+ wf.setnchannels(1) # Mono audio
136
+ wf.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
137
+ wf.setframerate(48000) # Sample rate
138
+ wf.writeframes(waveform_int16.tobytes()) # Write waveform as bytes
139
+
140
+ buffer.seek(0)
141
+ ggwave.free(instance)
142
+ return Response(
143
+ content=buffer.getvalue(),
144
+ media_type="audio/wav",
145
+ headers={
146
+ "X-User-Message": user_message.decode("utf-8"),
147
+ "X-LLM-Response": llm_response
148
+ }
149
+ )
150
+
151
+ except Exception as e:
152
+ print(f"Error processing audio: {str(e)}")
153
+ ggwave.free(instance)
154
+ return Response(
155
+ content=f"Error processing audio: {str(e)}",
156
+ media_type="text/plain",
157
+ status_code=500
158
+ )
159
+
160
+ except Exception as e:
161
+ print(f"Unexpected error: {str(e)}")
162
+ return Response(
163
+ content=f"Unexpected error: {str(e)}",
164
+ media_type="text/plain",
165
+ status_code=500
166
+ )
167
  @app.post("/continuous-chat/")
168
  async def continuous_chat(
169
  file: UploadFile = File(...),