NumberVideo_Bot / process_audio.py
Eyuvaraj
heygen api video gen, timestamp clipping
41cfa9c
from pydub import AudioSegment
from pydub.silence import detect_silence
import os
import json
def split_audio_on_silence(input_audio_path, output_dir, silence_thresh=-40, min_silence_len=120, keep_silence=30):
"""
Splits an audio file into multiple clips when silence is detected and generates a JSON with timestamps.
:param input_audio_path: Path to the input audio file
:param output_dir: Directory to save the output clips
:param silence_thresh: Silence threshold in dBFS (default: -40 dBFS)
:param min_silence_len: Minimum silence length to detect in milliseconds (default: 500 ms)
:param keep_silence: Milliseconds of silence to retain at the beginning and end of each clip (default: 200 ms)
"""
# Load the audio file
audio = AudioSegment.from_file(input_audio_path)
# Detect silences in the audio
silence_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
# Adjust silence ranges to keep a bit of silence in each segment
adjusted_ranges = [(start - keep_silence, end + keep_silence) for start, end in silence_ranges]
adjusted_ranges = [(max(0, start), min(len(audio), end)) for start, end in adjusted_ranges]
# List to store clip metadata
clips_metadata = []
# If no silences are detected, save the whole audio as one clip
if not adjusted_ranges:
output_path = os.path.join(output_dir, "clip_0.wav")
audio.export(output_path, format="wav")
clips_metadata.append({
"clip_number": 0,
"start_time_ms": 0,
"end_time_ms": len(audio),
"file_name": "clip_0.wav"
})
print(f"No silences detected. Entire audio saved as {output_path}")
# Save metadata to JSON
with open(os.path.join(output_dir, "clips_metadata.json"), "w") as json_file:
json.dump(clips_metadata, json_file, indent=4)
return
# Split audio into clips based on silence
start_idx = 0
for i, (start, end) in enumerate(adjusted_ranges):
if start > start_idx:
clip = audio[start_idx:start]
file_name = f"clip_{i}.wav"
output_path = os.path.join(output_dir, file_name)
clip.export(output_path, format="wav")
print(f"Saved clip: {output_path}")
# Add metadata for the clip
clips_metadata.append({
"clip_number": i,
"start_time_ms": start_idx,
"end_time_ms": start,
"file_name": file_name
})
start_idx = end
# Save the last segment after the final silence
if start_idx < len(audio):
file_name = f"clip_{len(adjusted_ranges)}.wav"
output_path = os.path.join(output_dir, file_name)
audio[start_idx:].export(output_path, format="wav")
print(f"Saved clip: {output_path}")
# Add metadata for the last clip
clips_metadata.append({
"clip_number": len(adjusted_ranges),
"start_time_ms": start_idx,
"end_time_ms": len(audio),
"file_name": file_name
})
# Save metadata to JSON
json_path = os.path.join(output_dir, "clips_metadata.json")
with open(json_path, "w") as json_file:
json.dump(clips_metadata, json_file, indent=4)
print(f"Clip metadata saved to {json_path}")
if __name__ == "__main__":
input_audio = "male_audio.m4a"
output_directory = "audio_clips"
os.makedirs(output_directory, exist_ok=True)
split_audio_on_silence(input_audio, output_directory)