yasserrmd commited on
Commit
09351a6
·
verified ·
1 Parent(s): ef4ce16

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +66 -44
app.py CHANGED
@@ -70,49 +70,71 @@ def text_to_speech(input_text: TextInput):
70
  @app.post("/chat/")
71
  async def chat_with_llm(file: UploadFile = File(...)):
72
  """Process input WAV, send text to LLM, and return generated response as WAV."""
73
- with open("input_chat.wav", "wb") as audio_file:
74
- audio_file.write(await file.read())
75
-
76
- # Load WAV file
77
- fs, recorded_waveform = wav.read("input_chat.wav")
78
- os.remove("input_chat.wav")
79
- recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
80
- waveform_bytes = recorded_waveform.tobytes()
81
- user_message = ggwave.decode(instance, waveform_bytes)
82
- print("user_message" + user_message.decode("utf-8") )
83
- # Send to LLM
84
- chat_completion = client.chat.completions.create(
85
- messages=[
86
- {
87
- "role": "system",
88
- "content": "you are a helpful assistant. answer alway in one sentence"
89
- },
90
- {"role": "user", "content": user_message.decode("utf-8")}],
91
- model="llama-3.3-70b-versatile",
92
- )
93
- llm_response = chat_completion.choices[0].message.content
94
- print(llm_response)
95
- # Convert response to audio
96
- """Convert text to a WAV audio file using ggwave and return as response."""
97
- encoded_waveform = ggwave.encode(llm_response , protocolId=1, volume=100)
98
-
99
- # Convert byte data into float32 array
100
- waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
101
-
102
- # Normalize float32 data to the range of int16
103
- waveform_int16 = np.int16(waveform_float32 * 32767)
104
 
105
- # Save to buffer instead of a file
106
- buffer = io.BytesIO()
107
- with wave.open(buffer, "wb") as wf:
108
- wf.setnchannels(1) # Mono audio
109
- wf.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
110
- wf.setframerate(48000) # Sample rate
111
- wf.writeframes(waveform_int16.tobytes()) # Write waveform as bytes
112
-
113
- buffer.seek(0)
114
 
115
- return Response(content=buffer.getvalue(), media_type="audio/wav", headers={
116
- "X-User-Message": user_message.decode("utf-8"),
117
- "X-LLM-Response": llm_response
118
- })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  @app.post("/chat/")
71
  async def chat_with_llm(file: UploadFile = File(...)):
72
  """Process input WAV, send text to LLM, and return generated response as WAV."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
+ # Read the file content into memory without saving to disk
75
+ file_content = await file.read()
 
 
 
 
 
 
 
76
 
77
+ # Create a BytesIO object to use with wav.read
78
+ with io.BytesIO(file_content) as buffer:
79
+ try:
80
+ fs, recorded_waveform = wav.read(buffer)
81
+ recorded_waveform = recorded_waveform.astype(np.float32) / 32767.0
82
+ waveform_bytes = recorded_waveform.tobytes()
83
+ user_message = ggwave.decode(instance, waveform_bytes)
84
+
85
+ if user_message is None:
86
+ return Response(
87
+ content="No message detected in audio",
88
+ media_type="text/plain",
89
+ status_code=400
90
+ )
91
+
92
+ print("user_message: " + user_message.decode("utf-8"))
93
+
94
+ # Send to LLM
95
+ chat_completion = client.chat.completions.create(
96
+ messages=[
97
+ {"role": "system", "content": "you are a helpful assistant. answer always in one sentence"},
98
+ {"role": "user", "content": user_message.decode("utf-8")}
99
+ ],
100
+ model="llama-3.3-70b-versatile",
101
+ )
102
+
103
+ llm_response = chat_completion.choices[0].message.content
104
+ print(llm_response)
105
+
106
+ # Convert response to audio
107
+ encoded_waveform = ggwave.encode(llm_response, protocolId=1, volume=100)
108
+
109
+ # Convert byte data into float32 array
110
+ waveform_float32 = np.frombuffer(encoded_waveform, dtype=np.float32)
111
+
112
+ # Normalize float32 data to the range of int16
113
+ waveform_int16 = np.int16(waveform_float32 * 32767)
114
+
115
+ # Save to buffer instead of a file
116
+ buffer = io.BytesIO()
117
+ with wave.open(buffer, "wb") as wf:
118
+ wf.setnchannels(1) # Mono audio
119
+ wf.setsampwidth(2) # 2 bytes per sample (16-bit PCM)
120
+ wf.setframerate(48000) # Sample rate
121
+ wf.writeframes(waveform_int16.tobytes()) # Write waveform as bytes
122
+
123
+ buffer.seek(0)
124
+
125
+ return Response(
126
+ content=buffer.getvalue(),
127
+ media_type="audio/wav",
128
+ headers={
129
+ "X-User-Message": user_message.decode("utf-8"),
130
+ "X-LLM-Response": llm_response
131
+ }
132
+ )
133
+
134
+ except Exception as e:
135
+ print(f"Error processing audio: {str(e)}")
136
+ return Response(
137
+ content=f"Error processing audio: {str(e)}",
138
+ media_type="text/plain",
139
+ status_code=500
140
+ )