Spaces:
Running
Running
Delete audio_buffer_manager.py
Browse files- audio_buffer_manager.py +0 -256
audio_buffer_manager.py
DELETED
@@ -1,256 +0,0 @@
|
|
1 |
-
"""
|
2 |
-
Audio Buffer Manager for Flare
|
3 |
-
==============================
|
4 |
-
Manages audio buffering, silence detection, and chunk processing
|
5 |
-
"""
|
6 |
-
import asyncio
|
7 |
-
from typing import Dict, Optional, List, Tuple, Any
|
8 |
-
from collections import deque
|
9 |
-
from datetime import datetime
|
10 |
-
import base64
|
11 |
-
import numpy as np
|
12 |
-
from dataclasses import dataclass
|
13 |
-
import traceback
|
14 |
-
|
15 |
-
from event_bus import EventBus, Event, EventType
|
16 |
-
from utils.logger import log_info, log_error, log_debug, log_warning
|
17 |
-
|
18 |
-
|
19 |
-
@dataclass
|
20 |
-
class AudioChunk:
|
21 |
-
"""Audio chunk with metadata"""
|
22 |
-
data: bytes
|
23 |
-
timestamp: datetime
|
24 |
-
chunk_index: int
|
25 |
-
is_speech: bool = True
|
26 |
-
energy_level: float = 0.0
|
27 |
-
|
28 |
-
|
29 |
-
class SilenceDetector:
|
30 |
-
"""Detect silence in audio stream"""
|
31 |
-
|
32 |
-
def __init__(self,
|
33 |
-
threshold_ms: int = 2000,
|
34 |
-
energy_threshold: float = 0.01,
|
35 |
-
sample_rate: int = 16000):
|
36 |
-
self.threshold_ms = threshold_ms
|
37 |
-
self.energy_threshold = energy_threshold
|
38 |
-
self.sample_rate = sample_rate
|
39 |
-
self.silence_start: Optional[datetime] = None
|
40 |
-
|
41 |
-
def detect_silence(self, audio_chunk: bytes) -> Tuple[bool, int]:
|
42 |
-
"""
|
43 |
-
Detect if chunk is silence and return duration
|
44 |
-
Returns: (is_silence, silence_duration_ms)
|
45 |
-
"""
|
46 |
-
try:
|
47 |
-
# Handle empty or invalid chunk
|
48 |
-
if not audio_chunk or len(audio_chunk) < 2:
|
49 |
-
return True, 0
|
50 |
-
|
51 |
-
# Ensure even number of bytes for 16-bit audio
|
52 |
-
if len(audio_chunk) % 2 != 0:
|
53 |
-
audio_chunk = audio_chunk[:-1]
|
54 |
-
|
55 |
-
# Convert to numpy array
|
56 |
-
audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
|
57 |
-
|
58 |
-
if len(audio_data) == 0:
|
59 |
-
return True, 0
|
60 |
-
|
61 |
-
# Calculate RMS energy
|
62 |
-
rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
|
63 |
-
normalized_rms = rms / 32768.0 # Normalize for 16-bit audio
|
64 |
-
|
65 |
-
is_silence = normalized_rms < self.energy_threshold
|
66 |
-
|
67 |
-
# Track silence duration
|
68 |
-
now = datetime.utcnow()
|
69 |
-
if is_silence:
|
70 |
-
if self.silence_start is None:
|
71 |
-
self.silence_start = now
|
72 |
-
duration_ms = int((now - self.silence_start).total_seconds() * 1000)
|
73 |
-
else:
|
74 |
-
self.silence_start = None
|
75 |
-
duration_ms = 0
|
76 |
-
|
77 |
-
return is_silence, duration_ms
|
78 |
-
|
79 |
-
except Exception as e:
|
80 |
-
log_warning(f"Silence detection error: {e}")
|
81 |
-
return False, 0
|
82 |
-
|
83 |
-
def reset(self):
|
84 |
-
"""Reset silence detection state"""
|
85 |
-
self.silence_start = None
|
86 |
-
|
87 |
-
|
88 |
-
class AudioBuffer:
|
89 |
-
"""Manage audio chunks for a session"""
|
90 |
-
|
91 |
-
def __init__(self,
|
92 |
-
session_id: str,
|
93 |
-
max_chunks: int = 1000,
|
94 |
-
chunk_size_bytes: int = 4096):
|
95 |
-
self.session_id = session_id
|
96 |
-
self.max_chunks = max_chunks
|
97 |
-
self.chunk_size_bytes = chunk_size_bytes
|
98 |
-
self.chunks: deque[AudioChunk] = deque(maxlen=max_chunks)
|
99 |
-
self.chunk_counter = 0
|
100 |
-
self.total_bytes = 0
|
101 |
-
self.lock = asyncio.Lock()
|
102 |
-
|
103 |
-
async def add_chunk(self, audio_data: bytes, timestamp: Optional[datetime] = None) -> AudioChunk:
|
104 |
-
"""Add audio chunk to buffer"""
|
105 |
-
async with self.lock:
|
106 |
-
if timestamp is None:
|
107 |
-
timestamp = datetime.utcnow()
|
108 |
-
|
109 |
-
chunk = AudioChunk(
|
110 |
-
data=audio_data,
|
111 |
-
timestamp=timestamp,
|
112 |
-
chunk_index=self.chunk_counter
|
113 |
-
)
|
114 |
-
|
115 |
-
self.chunks.append(chunk)
|
116 |
-
self.chunk_counter += 1
|
117 |
-
self.total_bytes += len(audio_data)
|
118 |
-
|
119 |
-
return chunk
|
120 |
-
|
121 |
-
async def get_recent_audio(self, duration_ms: int = 5000) -> bytes:
|
122 |
-
"""Get recent audio data"""
|
123 |
-
async with self.lock:
|
124 |
-
cutoff_time = datetime.utcnow()
|
125 |
-
audio_parts = []
|
126 |
-
|
127 |
-
# Iterate backwards through chunks
|
128 |
-
for chunk in reversed(self.chunks):
|
129 |
-
time_diff = (cutoff_time - chunk.timestamp).total_seconds() * 1000
|
130 |
-
if time_diff > duration_ms:
|
131 |
-
break
|
132 |
-
audio_parts.append(chunk.data)
|
133 |
-
|
134 |
-
# Reverse to maintain chronological order
|
135 |
-
audio_parts.reverse()
|
136 |
-
return b''.join(audio_parts)
|
137 |
-
|
138 |
-
async def clear(self):
|
139 |
-
"""Clear buffer"""
|
140 |
-
async with self.lock:
|
141 |
-
self.chunks.clear()
|
142 |
-
self.chunk_counter = 0
|
143 |
-
self.total_bytes = 0
|
144 |
-
|
145 |
-
def get_stats(self) -> Dict[str, Any]:
|
146 |
-
"""Get buffer statistics"""
|
147 |
-
return {
|
148 |
-
"chunks": len(self.chunks),
|
149 |
-
"total_bytes": self.total_bytes,
|
150 |
-
"chunk_counter": self.chunk_counter,
|
151 |
-
"oldest_chunk": self.chunks[0].timestamp if self.chunks else None,
|
152 |
-
"newest_chunk": self.chunks[-1].timestamp if self.chunks else None
|
153 |
-
}
|
154 |
-
|
155 |
-
|
156 |
-
class AudioBufferManager:
|
157 |
-
"""Manage audio buffers for all sessions"""
|
158 |
-
|
159 |
-
def __init__(self, event_bus: EventBus):
|
160 |
-
self.event_bus = event_bus
|
161 |
-
self.session_buffers: Dict[str, AudioBuffer] = {}
|
162 |
-
self.silence_detectors: Dict[str, SilenceDetector] = {}
|
163 |
-
self._setup_event_handlers()
|
164 |
-
|
165 |
-
def _setup_event_handlers(self):
|
166 |
-
"""Subscribe to audio events"""
|
167 |
-
self.event_bus.subscribe(EventType.SESSION_STARTED, self._handle_session_started)
|
168 |
-
self.event_bus.subscribe(EventType.SESSION_ENDED, self._handle_session_ended)
|
169 |
-
self.event_bus.subscribe(EventType.AUDIO_CHUNK_RECEIVED, self._handle_audio_chunk)
|
170 |
-
|
171 |
-
async def _handle_session_started(self, event: Event):
|
172 |
-
"""Initialize buffer for new session"""
|
173 |
-
session_id = event.session_id
|
174 |
-
config = event.data
|
175 |
-
|
176 |
-
# Create audio buffer
|
177 |
-
self.session_buffers[session_id] = AudioBuffer(
|
178 |
-
session_id=session_id,
|
179 |
-
max_chunks=config.get("max_chunks", 1000),
|
180 |
-
chunk_size_bytes=config.get("chunk_size", 4096)
|
181 |
-
)
|
182 |
-
|
183 |
-
log_info(f"📦 Audio buffer initialized", session_id=session_id)
|
184 |
-
|
185 |
-
async def _handle_session_ended(self, event: Event):
|
186 |
-
"""Cleanup session buffers"""
|
187 |
-
session_id = event.session_id
|
188 |
-
|
189 |
-
# Clear and remove buffer
|
190 |
-
if session_id in self.session_buffers:
|
191 |
-
await self.session_buffers[session_id].clear()
|
192 |
-
del self.session_buffers[session_id]
|
193 |
-
|
194 |
-
# Remove silence detector
|
195 |
-
if session_id in self.silence_detectors:
|
196 |
-
del self.silence_detectors[session_id]
|
197 |
-
|
198 |
-
log_info(f"📦 Audio buffer cleaned up", session_id=session_id)
|
199 |
-
|
200 |
-
async def _handle_audio_chunk(self, event: Event):
|
201 |
-
"""Process incoming audio chunk"""
|
202 |
-
session_id = event.session_id
|
203 |
-
|
204 |
-
buffer = self.session_buffers.get(session_id)
|
205 |
-
|
206 |
-
if not buffer:
|
207 |
-
log_warning(f"⚠️ No buffer for session", session_id=session_id)
|
208 |
-
return
|
209 |
-
|
210 |
-
try:
|
211 |
-
# Decode audio data
|
212 |
-
audio_data = base64.b64decode(event.data.get("audio_data", ""))
|
213 |
-
|
214 |
-
# Add to buffer
|
215 |
-
chunk = await buffer.add_chunk(audio_data)
|
216 |
-
|
217 |
-
# Log periodically
|
218 |
-
if chunk.chunk_index % 100 == 0:
|
219 |
-
stats = buffer.get_stats()
|
220 |
-
log_debug(
|
221 |
-
f"📊 Buffer stats",
|
222 |
-
session_id=session_id,
|
223 |
-
**stats
|
224 |
-
)
|
225 |
-
|
226 |
-
except Exception as e:
|
227 |
-
log_error(
|
228 |
-
f"❌ Error processing audio chunk",
|
229 |
-
session_id=session_id,
|
230 |
-
error=str(e),
|
231 |
-
traceback=traceback.format_exc()
|
232 |
-
)
|
233 |
-
|
234 |
-
async def get_buffer(self, session_id: str) -> Optional[AudioBuffer]:
|
235 |
-
"""Get buffer for session"""
|
236 |
-
return self.session_buffers.get(session_id)
|
237 |
-
|
238 |
-
async def reset_buffer(self, session_id: str):
|
239 |
-
"""Reset buffer for new utterance"""
|
240 |
-
buffer = self.session_buffers.get(session_id)
|
241 |
-
detector = self.silence_detectors.get(session_id)
|
242 |
-
|
243 |
-
if buffer:
|
244 |
-
await buffer.clear()
|
245 |
-
|
246 |
-
if detector:
|
247 |
-
detector.reset()
|
248 |
-
|
249 |
-
log_debug(f"🔄 Audio buffer reset", session_id=session_id)
|
250 |
-
|
251 |
-
def get_all_stats(self) -> Dict[str, Dict[str, Any]]:
|
252 |
-
"""Get statistics for all buffers"""
|
253 |
-
stats = {}
|
254 |
-
for session_id, buffer in self.session_buffers.items():
|
255 |
-
stats[session_id] = buffer.get_stats()
|
256 |
-
return stats
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|