ciyidogan commited on
Commit
041bedd
·
verified ·
1 Parent(s): 8682228

Delete audio_buffer_manager.py

Browse files
Files changed (1) hide show
  1. audio_buffer_manager.py +0 -256
audio_buffer_manager.py DELETED
@@ -1,256 +0,0 @@
1
- """
2
- Audio Buffer Manager for Flare
3
- ==============================
4
- Manages audio buffering, silence detection, and chunk processing
5
- """
6
- import asyncio
7
- from typing import Dict, Optional, List, Tuple, Any
8
- from collections import deque
9
- from datetime import datetime
10
- import base64
11
- import numpy as np
12
- from dataclasses import dataclass
13
- import traceback
14
-
15
- from event_bus import EventBus, Event, EventType
16
- from utils.logger import log_info, log_error, log_debug, log_warning
17
-
18
-
19
- @dataclass
20
- class AudioChunk:
21
- """Audio chunk with metadata"""
22
- data: bytes
23
- timestamp: datetime
24
- chunk_index: int
25
- is_speech: bool = True
26
- energy_level: float = 0.0
27
-
28
-
29
- class SilenceDetector:
30
- """Detect silence in audio stream"""
31
-
32
- def __init__(self,
33
- threshold_ms: int = 2000,
34
- energy_threshold: float = 0.01,
35
- sample_rate: int = 16000):
36
- self.threshold_ms = threshold_ms
37
- self.energy_threshold = energy_threshold
38
- self.sample_rate = sample_rate
39
- self.silence_start: Optional[datetime] = None
40
-
41
- def detect_silence(self, audio_chunk: bytes) -> Tuple[bool, int]:
42
- """
43
- Detect if chunk is silence and return duration
44
- Returns: (is_silence, silence_duration_ms)
45
- """
46
- try:
47
- # Handle empty or invalid chunk
48
- if not audio_chunk or len(audio_chunk) < 2:
49
- return True, 0
50
-
51
- # Ensure even number of bytes for 16-bit audio
52
- if len(audio_chunk) % 2 != 0:
53
- audio_chunk = audio_chunk[:-1]
54
-
55
- # Convert to numpy array
56
- audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
57
-
58
- if len(audio_data) == 0:
59
- return True, 0
60
-
61
- # Calculate RMS energy
62
- rms = np.sqrt(np.mean(audio_data.astype(float) ** 2))
63
- normalized_rms = rms / 32768.0 # Normalize for 16-bit audio
64
-
65
- is_silence = normalized_rms < self.energy_threshold
66
-
67
- # Track silence duration
68
- now = datetime.utcnow()
69
- if is_silence:
70
- if self.silence_start is None:
71
- self.silence_start = now
72
- duration_ms = int((now - self.silence_start).total_seconds() * 1000)
73
- else:
74
- self.silence_start = None
75
- duration_ms = 0
76
-
77
- return is_silence, duration_ms
78
-
79
- except Exception as e:
80
- log_warning(f"Silence detection error: {e}")
81
- return False, 0
82
-
83
- def reset(self):
84
- """Reset silence detection state"""
85
- self.silence_start = None
86
-
87
-
88
- class AudioBuffer:
89
- """Manage audio chunks for a session"""
90
-
91
- def __init__(self,
92
- session_id: str,
93
- max_chunks: int = 1000,
94
- chunk_size_bytes: int = 4096):
95
- self.session_id = session_id
96
- self.max_chunks = max_chunks
97
- self.chunk_size_bytes = chunk_size_bytes
98
- self.chunks: deque[AudioChunk] = deque(maxlen=max_chunks)
99
- self.chunk_counter = 0
100
- self.total_bytes = 0
101
- self.lock = asyncio.Lock()
102
-
103
- async def add_chunk(self, audio_data: bytes, timestamp: Optional[datetime] = None) -> AudioChunk:
104
- """Add audio chunk to buffer"""
105
- async with self.lock:
106
- if timestamp is None:
107
- timestamp = datetime.utcnow()
108
-
109
- chunk = AudioChunk(
110
- data=audio_data,
111
- timestamp=timestamp,
112
- chunk_index=self.chunk_counter
113
- )
114
-
115
- self.chunks.append(chunk)
116
- self.chunk_counter += 1
117
- self.total_bytes += len(audio_data)
118
-
119
- return chunk
120
-
121
- async def get_recent_audio(self, duration_ms: int = 5000) -> bytes:
122
- """Get recent audio data"""
123
- async with self.lock:
124
- cutoff_time = datetime.utcnow()
125
- audio_parts = []
126
-
127
- # Iterate backwards through chunks
128
- for chunk in reversed(self.chunks):
129
- time_diff = (cutoff_time - chunk.timestamp).total_seconds() * 1000
130
- if time_diff > duration_ms:
131
- break
132
- audio_parts.append(chunk.data)
133
-
134
- # Reverse to maintain chronological order
135
- audio_parts.reverse()
136
- return b''.join(audio_parts)
137
-
138
- async def clear(self):
139
- """Clear buffer"""
140
- async with self.lock:
141
- self.chunks.clear()
142
- self.chunk_counter = 0
143
- self.total_bytes = 0
144
-
145
- def get_stats(self) -> Dict[str, Any]:
146
- """Get buffer statistics"""
147
- return {
148
- "chunks": len(self.chunks),
149
- "total_bytes": self.total_bytes,
150
- "chunk_counter": self.chunk_counter,
151
- "oldest_chunk": self.chunks[0].timestamp if self.chunks else None,
152
- "newest_chunk": self.chunks[-1].timestamp if self.chunks else None
153
- }
154
-
155
-
156
- class AudioBufferManager:
157
- """Manage audio buffers for all sessions"""
158
-
159
- def __init__(self, event_bus: EventBus):
160
- self.event_bus = event_bus
161
- self.session_buffers: Dict[str, AudioBuffer] = {}
162
- self.silence_detectors: Dict[str, SilenceDetector] = {}
163
- self._setup_event_handlers()
164
-
165
- def _setup_event_handlers(self):
166
- """Subscribe to audio events"""
167
- self.event_bus.subscribe(EventType.SESSION_STARTED, self._handle_session_started)
168
- self.event_bus.subscribe(EventType.SESSION_ENDED, self._handle_session_ended)
169
- self.event_bus.subscribe(EventType.AUDIO_CHUNK_RECEIVED, self._handle_audio_chunk)
170
-
171
- async def _handle_session_started(self, event: Event):
172
- """Initialize buffer for new session"""
173
- session_id = event.session_id
174
- config = event.data
175
-
176
- # Create audio buffer
177
- self.session_buffers[session_id] = AudioBuffer(
178
- session_id=session_id,
179
- max_chunks=config.get("max_chunks", 1000),
180
- chunk_size_bytes=config.get("chunk_size", 4096)
181
- )
182
-
183
- log_info(f"📦 Audio buffer initialized", session_id=session_id)
184
-
185
- async def _handle_session_ended(self, event: Event):
186
- """Cleanup session buffers"""
187
- session_id = event.session_id
188
-
189
- # Clear and remove buffer
190
- if session_id in self.session_buffers:
191
- await self.session_buffers[session_id].clear()
192
- del self.session_buffers[session_id]
193
-
194
- # Remove silence detector
195
- if session_id in self.silence_detectors:
196
- del self.silence_detectors[session_id]
197
-
198
- log_info(f"📦 Audio buffer cleaned up", session_id=session_id)
199
-
200
- async def _handle_audio_chunk(self, event: Event):
201
- """Process incoming audio chunk"""
202
- session_id = event.session_id
203
-
204
- buffer = self.session_buffers.get(session_id)
205
-
206
- if not buffer:
207
- log_warning(f"⚠️ No buffer for session", session_id=session_id)
208
- return
209
-
210
- try:
211
- # Decode audio data
212
- audio_data = base64.b64decode(event.data.get("audio_data", ""))
213
-
214
- # Add to buffer
215
- chunk = await buffer.add_chunk(audio_data)
216
-
217
- # Log periodically
218
- if chunk.chunk_index % 100 == 0:
219
- stats = buffer.get_stats()
220
- log_debug(
221
- f"📊 Buffer stats",
222
- session_id=session_id,
223
- **stats
224
- )
225
-
226
- except Exception as e:
227
- log_error(
228
- f"❌ Error processing audio chunk",
229
- session_id=session_id,
230
- error=str(e),
231
- traceback=traceback.format_exc()
232
- )
233
-
234
- async def get_buffer(self, session_id: str) -> Optional[AudioBuffer]:
235
- """Get buffer for session"""
236
- return self.session_buffers.get(session_id)
237
-
238
- async def reset_buffer(self, session_id: str):
239
- """Reset buffer for new utterance"""
240
- buffer = self.session_buffers.get(session_id)
241
- detector = self.silence_detectors.get(session_id)
242
-
243
- if buffer:
244
- await buffer.clear()
245
-
246
- if detector:
247
- detector.reset()
248
-
249
- log_debug(f"🔄 Audio buffer reset", session_id=session_id)
250
-
251
- def get_all_stats(self) -> Dict[str, Dict[str, Any]]:
252
- """Get statistics for all buffers"""
253
- stats = {}
254
- for session_id, buffer in self.session_buffers.items():
255
- stats[session_id] = buffer.get_stats()
256
- return stats