Spaces:
Sleeping
Sleeping
add
Browse files- audio_extractor.py +17 -60
audio_extractor.py
CHANGED
@@ -254,63 +254,33 @@ class SimpleAudioExtractor:
|
|
254 |
except Exception as e:
|
255 |
raise Exception(f"Failed to convert audio to WAV: {str(e)}")
|
256 |
|
257 |
-
def
|
258 |
-
"""Create
|
259 |
total_samples = waveform.size(1)
|
260 |
duration_sec = total_samples / sample_rate
|
261 |
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
if duration_sec <= 5:
|
266 |
-
print(f"π¦ Audio is very short ({duration_sec:.2f}s), keeping as single chunk")
|
267 |
return [waveform]
|
268 |
|
269 |
-
# For
|
270 |
-
|
271 |
-
|
272 |
-
print(f"π¦ Short audio detected, using {chunk_length_sec:.1f}s chunks")
|
273 |
-
|
274 |
-
# For medium audio (30s-2min), use 30-second chunks
|
275 |
-
elif duration_sec <= 120:
|
276 |
-
chunk_length_sec = 30
|
277 |
-
print(f"π¦ Medium audio detected, using {chunk_length_sec}s chunks")
|
278 |
-
|
279 |
-
# For long audio (>2min), use 1-minute chunks
|
280 |
-
else:
|
281 |
-
chunk_length_sec = 60
|
282 |
-
print(f"π¦ Long audio detected, using {chunk_length_sec}s chunks")
|
283 |
-
|
284 |
-
chunk_samples = int(chunk_length_sec * sample_rate)
|
285 |
chunks = []
|
286 |
-
min_samples = int(min_chunk_sec * sample_rate) # Minimum chunk size
|
287 |
|
288 |
for start in range(0, total_samples, chunk_samples):
|
289 |
end = min(start + chunk_samples, total_samples)
|
290 |
chunk = waveform[:, start:end]
|
291 |
-
|
292 |
-
|
293 |
-
if chunk.size(1) >= min_samples or (len(chunks) == 0 and start + chunk_samples >= total_samples):
|
294 |
chunks.append(chunk)
|
295 |
-
chunk_dur = chunk.size(1) / sample_rate
|
296 |
-
print(f" β Chunk {len(chunks)}: {chunk_dur:.2f}s")
|
297 |
-
else:
|
298 |
-
# If chunk is too small, merge it with the previous chunk if possible
|
299 |
-
if chunks:
|
300 |
-
print(f" π Merging small chunk ({chunk.size(1) / sample_rate:.2f}s) with previous")
|
301 |
-
chunks[-1] = torch.cat([chunks[-1], chunk], dim=1)
|
302 |
-
merged_dur = chunks[-1].size(1) / sample_rate
|
303 |
-
print(f" β Merged chunk {len(chunks)}: {merged_dur:.2f}s")
|
304 |
-
else:
|
305 |
-
# If no previous chunks, keep it anyway (better than losing audio)
|
306 |
-
chunks.append(chunk)
|
307 |
-
print(f" β οΈ Keeping small chunk {len(chunks)}: {chunk.size(1) / sample_rate:.2f}s (no other chunks)")
|
308 |
|
309 |
-
print(f"π¦ Created {len(chunks)}
|
310 |
return chunks
|
311 |
|
312 |
-
def prepare_audio(video_source,
|
313 |
-
"""Main function to extract and prepare
|
314 |
try:
|
315 |
print(f"π΅ Extracting audio from source...")
|
316 |
extractor = SimpleAudioExtractor()
|
@@ -333,34 +303,21 @@ def prepare_audio(video_source, min_chunk_seconds=2):
|
|
333 |
end = time.time()
|
334 |
print(f"[β±οΈ] Audio preparation took {end - start:.2f} seconds.")
|
335 |
|
336 |
-
# Calculate duration
|
337 |
duration_minutes = waveform.size(1) / sample_rate / 60
|
338 |
|
339 |
-
print(f"π§© Creating
|
340 |
start = time.time()
|
341 |
-
chunks =
|
342 |
end = time.time()
|
343 |
print(f"[β±οΈ] Chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
|
344 |
|
345 |
-
# Log chunk details
|
346 |
-
print("π Chunk Summary:")
|
347 |
-
total_chunk_duration = 0
|
348 |
-
for i, chunk in enumerate(chunks, 1):
|
349 |
-
chunk_duration = chunk.size(1) / sample_rate
|
350 |
-
total_chunk_duration += chunk_duration
|
351 |
-
print(f" Chunk {i}: {chunk_duration:.2f}s")
|
352 |
-
|
353 |
-
print(f" Total chunked duration: {total_chunk_duration:.2f}s")
|
354 |
-
print(f" Original duration: {duration_minutes * 60:.2f}s")
|
355 |
-
|
356 |
return {
|
357 |
"success": True,
|
358 |
"chunks": chunks,
|
359 |
"audio_path": audio_path,
|
360 |
"duration_minutes": duration_minutes,
|
361 |
-
"total_chunks": len(chunks)
|
362 |
-
"chunk_details": [{"chunk_id": i+1, "duration_seconds": chunk.size(1) / sample_rate}
|
363 |
-
for i, chunk in enumerate(chunks)]
|
364 |
}
|
365 |
|
366 |
except Exception as e:
|
|
|
254 |
except Exception as e:
|
255 |
raise Exception(f"Failed to convert audio to WAV: {str(e)}")
|
256 |
|
257 |
+
def chunk_audio_1min(waveform, sample_rate, short_audio_threshold=30):
|
258 |
+
"""Create 1-minute chunks from audio, handle short audio as single chunk"""
|
259 |
total_samples = waveform.size(1)
|
260 |
duration_sec = total_samples / sample_rate
|
261 |
|
262 |
+
# If audio is short (β€30 seconds by default), return as single chunk
|
263 |
+
if duration_sec <= short_audio_threshold:
|
264 |
+
print(f"π¦ Short audio ({duration_sec:.2f}s), keeping as single chunk")
|
|
|
|
|
265 |
return [waveform]
|
266 |
|
267 |
+
# For longer audio, use 1-minute chunks
|
268 |
+
chunk_length_sec = 60 # 1 minute chunks
|
269 |
+
chunk_samples = chunk_length_sec * sample_rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
270 |
chunks = []
|
|
|
271 |
|
272 |
for start in range(0, total_samples, chunk_samples):
|
273 |
end = min(start + chunk_samples, total_samples)
|
274 |
chunk = waveform[:, start:end]
|
275 |
+
# Only include chunks that are at least 10 seconds long
|
276 |
+
if chunk.size(1) > sample_rate * 10:
|
|
|
277 |
chunks.append(chunk)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
278 |
|
279 |
+
print(f"π¦ Created {len(chunks)} 1-minute chunks")
|
280 |
return chunks
|
281 |
|
282 |
+
def prepare_audio(video_source, short_audio_threshold=30):
|
283 |
+
"""Main function to extract and prepare audio chunks, handling short audio as single segment"""
|
284 |
try:
|
285 |
print(f"π΅ Extracting audio from source...")
|
286 |
extractor = SimpleAudioExtractor()
|
|
|
303 |
end = time.time()
|
304 |
print(f"[β±οΈ] Audio preparation took {end - start:.2f} seconds.")
|
305 |
|
306 |
+
# Calculate duration and create chunks
|
307 |
duration_minutes = waveform.size(1) / sample_rate / 60
|
308 |
|
309 |
+
print(f"π§© Creating chunks (short audio threshold: {short_audio_threshold}s)...")
|
310 |
start = time.time()
|
311 |
+
chunks = chunk_audio_1min(waveform, sample_rate, short_audio_threshold)
|
312 |
end = time.time()
|
313 |
print(f"[β±οΈ] Chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
|
314 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
315 |
return {
|
316 |
"success": True,
|
317 |
"chunks": chunks,
|
318 |
"audio_path": audio_path,
|
319 |
"duration_minutes": duration_minutes,
|
320 |
+
"total_chunks": len(chunks)
|
|
|
|
|
321 |
}
|
322 |
|
323 |
except Exception as e:
|