Amr-h commited on
Commit
5ac9990
Β·
1 Parent(s): a7a25f4

add smaller chuncks

Browse files
Files changed (1) hide show
  1. audio_extractor.py +64 -13
audio_extractor.py CHANGED
@@ -254,25 +254,63 @@ class SimpleAudioExtractor:
254
  except Exception as e:
255
  raise Exception(f"Failed to convert audio to WAV: {str(e)}")
256
 
257
- def chunk_audio_1min(waveform, sample_rate):
258
- """Create 1-minute chunks from audio"""
259
- chunk_length_sec = 60 # 1 minute chunks
260
- chunk_samples = chunk_length_sec * sample_rate
261
  total_samples = waveform.size(1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
  chunks = []
 
263
 
264
  for start in range(0, total_samples, chunk_samples):
265
  end = min(start + chunk_samples, total_samples)
266
  chunk = waveform[:, start:end]
267
- # Only include chunks that are at least 10 seconds long
268
- if chunk.size(1) > sample_rate * 10:
 
269
  chunks.append(chunk)
 
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- print(f"πŸ“¦ Created {len(chunks)} 1-minute chunks")
272
  return chunks
273
 
274
- def prepare_audio(video_source):
275
- """Main function to extract and prepare 1-minute audio chunks"""
276
  try:
277
  print(f"🎡 Extracting audio from source...")
278
  extractor = SimpleAudioExtractor()
@@ -295,21 +333,34 @@ def prepare_audio(video_source):
295
  end = time.time()
296
  print(f"[⏱️] Audio preparation took {end - start:.2f} seconds.")
297
 
298
- # Calculate duration and create 1-minute chunks
299
  duration_minutes = waveform.size(1) / sample_rate / 60
300
 
301
- print(f"🧩 Creating 1-minute chunks...")
302
  start = time.time()
303
- chunks = chunk_audio_1min(waveform, sample_rate)
304
  end = time.time()
305
  print(f"[⏱️] Chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
306
 
 
 
 
 
 
 
 
 
 
 
 
307
  return {
308
  "success": True,
309
  "chunks": chunks,
310
  "audio_path": audio_path,
311
  "duration_minutes": duration_minutes,
312
- "total_chunks": len(chunks)
 
 
313
  }
314
 
315
  except Exception as e:
 
254
  except Exception as e:
255
  raise Exception(f"Failed to convert audio to WAV: {str(e)}")
256
 
257
+ def chunk_audio_adaptive(waveform, sample_rate, min_chunk_sec=2):
258
+ """Create adaptive chunks from audio, handling small voices and short audio"""
 
 
259
  total_samples = waveform.size(1)
260
+ duration_sec = total_samples / sample_rate
261
+
262
+ print(f"🎡 Audio duration: {duration_sec:.2f} seconds ({duration_sec/60:.2f} minutes)")
263
+
264
+ # For very short audio (less than 5 seconds), return as single chunk
265
+ if duration_sec <= 5:
266
+ print(f"πŸ“¦ Audio is very short ({duration_sec:.2f}s), keeping as single chunk")
267
+ return [waveform]
268
+
269
+ # For short audio (5-30 seconds), create smaller chunks
270
+ elif duration_sec <= 30:
271
+ chunk_length_sec = max(min_chunk_sec, duration_sec / 3) # Split into ~3 chunks
272
+ print(f"πŸ“¦ Short audio detected, using {chunk_length_sec:.1f}s chunks")
273
+
274
+ # For medium audio (30s-2min), use 30-second chunks
275
+ elif duration_sec <= 120:
276
+ chunk_length_sec = 30
277
+ print(f"πŸ“¦ Medium audio detected, using {chunk_length_sec}s chunks")
278
+
279
+ # For long audio (>2min), use 1-minute chunks
280
+ else:
281
+ chunk_length_sec = 60
282
+ print(f"πŸ“¦ Long audio detected, using {chunk_length_sec}s chunks")
283
+
284
+ chunk_samples = int(chunk_length_sec * sample_rate)
285
  chunks = []
286
+ min_samples = int(min_chunk_sec * sample_rate) # Minimum chunk size
287
 
288
  for start in range(0, total_samples, chunk_samples):
289
  end = min(start + chunk_samples, total_samples)
290
  chunk = waveform[:, start:end]
291
+
292
+ # Include chunk if it meets minimum size OR if it's the last chunk and we have no chunks yet
293
+ if chunk.size(1) >= min_samples or (len(chunks) == 0 and start + chunk_samples >= total_samples):
294
  chunks.append(chunk)
295
+ chunk_dur = chunk.size(1) / sample_rate
296
+ print(f" βœ“ Chunk {len(chunks)}: {chunk_dur:.2f}s")
297
+ else:
298
+ # If chunk is too small, merge it with the previous chunk if possible
299
+ if chunks:
300
+ print(f" πŸ“Ž Merging small chunk ({chunk.size(1) / sample_rate:.2f}s) with previous")
301
+ chunks[-1] = torch.cat([chunks[-1], chunk], dim=1)
302
+ merged_dur = chunks[-1].size(1) / sample_rate
303
+ print(f" βœ“ Merged chunk {len(chunks)}: {merged_dur:.2f}s")
304
+ else:
305
+ # If no previous chunks, keep it anyway (better than losing audio)
306
+ chunks.append(chunk)
307
+ print(f" ⚠️ Keeping small chunk {len(chunks)}: {chunk.size(1) / sample_rate:.2f}s (no other chunks)")
308
 
309
+ print(f"πŸ“¦ Created {len(chunks)} adaptive chunks")
310
  return chunks
311
 
312
+ def prepare_audio(video_source, min_chunk_seconds=2):
313
+ """Main function to extract and prepare adaptive audio chunks for small voices"""
314
  try:
315
  print(f"🎡 Extracting audio from source...")
316
  extractor = SimpleAudioExtractor()
 
333
  end = time.time()
334
  print(f"[⏱️] Audio preparation took {end - start:.2f} seconds.")
335
 
336
+ # Calculate duration
337
  duration_minutes = waveform.size(1) / sample_rate / 60
338
 
339
+ print(f"🧩 Creating adaptive chunks (min {min_chunk_seconds}s)...")
340
  start = time.time()
341
+ chunks = chunk_audio_adaptive(waveform, sample_rate, min_chunk_seconds)
342
  end = time.time()
343
  print(f"[⏱️] Chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
344
 
345
+ # Log chunk details
346
+ print("πŸ“‹ Chunk Summary:")
347
+ total_chunk_duration = 0
348
+ for i, chunk in enumerate(chunks, 1):
349
+ chunk_duration = chunk.size(1) / sample_rate
350
+ total_chunk_duration += chunk_duration
351
+ print(f" Chunk {i}: {chunk_duration:.2f}s")
352
+
353
+ print(f" Total chunked duration: {total_chunk_duration:.2f}s")
354
+ print(f" Original duration: {duration_minutes * 60:.2f}s")
355
+
356
  return {
357
  "success": True,
358
  "chunks": chunks,
359
  "audio_path": audio_path,
360
  "duration_minutes": duration_minutes,
361
+ "total_chunks": len(chunks),
362
+ "chunk_details": [{"chunk_id": i+1, "duration_seconds": chunk.size(1) / sample_rate}
363
+ for i, chunk in enumerate(chunks)]
364
  }
365
 
366
  except Exception as e: