Spaces:
Building
Building
Update audio_buffer_manager.py
Browse files- audio_buffer_manager.py +1 -48
audio_buffer_manager.py
CHANGED
@@ -180,13 +180,6 @@ class AudioBufferManager:
|
|
180 |
chunk_size_bytes=config.get("chunk_size", 4096)
|
181 |
)
|
182 |
|
183 |
-
# Create silence detector
|
184 |
-
self.silence_detectors[session_id] = SilenceDetector(
|
185 |
-
threshold_ms=config.get("silence_threshold_ms", 2000),
|
186 |
-
energy_threshold=config.get("energy_threshold", 0.01),
|
187 |
-
sample_rate=config.get("sample_rate", 16000)
|
188 |
-
)
|
189 |
-
|
190 |
log_info(f"📦 Audio buffer initialized", session_id=session_id)
|
191 |
|
192 |
async def _handle_session_ended(self, event: Event):
|
@@ -209,9 +202,8 @@ class AudioBufferManager:
|
|
209 |
session_id = event.session_id
|
210 |
|
211 |
buffer = self.session_buffers.get(session_id)
|
212 |
-
detector = self.silence_detectors.get(session_id)
|
213 |
|
214 |
-
if not buffer
|
215 |
log_warning(f"⚠️ No buffer for session", session_id=session_id)
|
216 |
return
|
217 |
|
@@ -222,45 +214,6 @@ class AudioBufferManager:
|
|
222 |
# Add to buffer
|
223 |
chunk = await buffer.add_chunk(audio_data)
|
224 |
|
225 |
-
# Detect silence
|
226 |
-
is_silence, silence_duration = detector.detect_silence(audio_data)
|
227 |
-
|
228 |
-
# Update chunk metadata
|
229 |
-
chunk.is_speech = not is_silence
|
230 |
-
chunk.energy_level = 1.0 - (silence_duration / detector.threshold_ms)
|
231 |
-
|
232 |
-
# Log every 50 chunks for debugging
|
233 |
-
if chunk.chunk_index % 50 == 0:
|
234 |
-
log_info(
|
235 |
-
f"🎤 Audio processing - Chunk: {chunk.chunk_index}, Silence: {is_silence}, Duration: {silence_duration}ms",
|
236 |
-
session_id=session_id
|
237 |
-
)
|
238 |
-
|
239 |
-
# Check for end of speech
|
240 |
-
if silence_duration > detector.threshold_ms:
|
241 |
-
log_info(
|
242 |
-
f"🔇 Speech ended (silence: {silence_duration}ms)",
|
243 |
-
session_id=session_id
|
244 |
-
)
|
245 |
-
|
246 |
-
# Get complete audio
|
247 |
-
complete_audio = await buffer.get_recent_audio()
|
248 |
-
|
249 |
-
# Publish speech ended event
|
250 |
-
await self.event_bus.publish(Event(
|
251 |
-
type=EventType.STT_RESULT,
|
252 |
-
session_id=session_id,
|
253 |
-
data={
|
254 |
-
"audio_data": base64.b64encode(complete_audio).decode(),
|
255 |
-
"is_final": True,
|
256 |
-
"silence_triggered": True
|
257 |
-
},
|
258 |
-
priority=5
|
259 |
-
))
|
260 |
-
|
261 |
-
# Reset for next utterance
|
262 |
-
await self.reset_buffer(session_id)
|
263 |
-
|
264 |
# Log periodically
|
265 |
if chunk.chunk_index % 100 == 0:
|
266 |
stats = buffer.get_stats()
|
|
|
180 |
chunk_size_bytes=config.get("chunk_size", 4096)
|
181 |
)
|
182 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
183 |
log_info(f"📦 Audio buffer initialized", session_id=session_id)
|
184 |
|
185 |
async def _handle_session_ended(self, event: Event):
|
|
|
202 |
session_id = event.session_id
|
203 |
|
204 |
buffer = self.session_buffers.get(session_id)
|
|
|
205 |
|
206 |
+
if not buffer:
|
207 |
log_warning(f"⚠️ No buffer for session", session_id=session_id)
|
208 |
return
|
209 |
|
|
|
214 |
# Add to buffer
|
215 |
chunk = await buffer.add_chunk(audio_data)
|
216 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
217 |
# Log periodically
|
218 |
if chunk.chunk_index % 100 == 0:
|
219 |
stats = buffer.get_stats()
|