from pydub import AudioSegment from pydub.silence import detect_silence import os import json def split_audio_on_silence(input_audio_path, output_dir, silence_thresh=-40, min_silence_len=120, keep_silence=30): """ Splits an audio file into multiple clips when silence is detected and generates a JSON with timestamps. :param input_audio_path: Path to the input audio file :param output_dir: Directory to save the output clips :param silence_thresh: Silence threshold in dBFS (default: -40 dBFS) :param min_silence_len: Minimum silence length to detect in milliseconds (default: 500 ms) :param keep_silence: Milliseconds of silence to retain at the beginning and end of each clip (default: 200 ms) """ # Load the audio file audio = AudioSegment.from_file(input_audio_path) # Detect silences in the audio silence_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh) # Adjust silence ranges to keep a bit of silence in each segment adjusted_ranges = [(start - keep_silence, end + keep_silence) for start, end in silence_ranges] adjusted_ranges = [(max(0, start), min(len(audio), end)) for start, end in adjusted_ranges] # List to store clip metadata clips_metadata = [] # If no silences are detected, save the whole audio as one clip if not adjusted_ranges: output_path = os.path.join(output_dir, "clip_0.wav") audio.export(output_path, format="wav") clips_metadata.append({ "clip_number": 0, "start_time_ms": 0, "end_time_ms": len(audio), "file_name": "clip_0.wav" }) print(f"No silences detected. Entire audio saved as {output_path}") # Save metadata to JSON with open(os.path.join(output_dir, "clips_metadata.json"), "w") as json_file: json.dump(clips_metadata, json_file, indent=4) return # Split audio into clips based on silence start_idx = 0 for i, (start, end) in enumerate(adjusted_ranges): if start > start_idx: clip = audio[start_idx:start] file_name = f"clip_{i}.wav" output_path = os.path.join(output_dir, file_name) clip.export(output_path, format="wav") print(f"Saved clip: {output_path}") # Add metadata for the clip clips_metadata.append({ "clip_number": i, "start_time_ms": start_idx, "end_time_ms": start, "file_name": file_name }) start_idx = end # Save the last segment after the final silence if start_idx < len(audio): file_name = f"clip_{len(adjusted_ranges)}.wav" output_path = os.path.join(output_dir, file_name) audio[start_idx:].export(output_path, format="wav") print(f"Saved clip: {output_path}") # Add metadata for the last clip clips_metadata.append({ "clip_number": len(adjusted_ranges), "start_time_ms": start_idx, "end_time_ms": len(audio), "file_name": file_name }) # Save metadata to JSON json_path = os.path.join(output_dir, "clips_metadata.json") with open(json_path, "w") as json_file: json.dump(clips_metadata, json_file, indent=4) print(f"Clip metadata saved to {json_path}") if __name__ == "__main__": input_audio = "male_audio.m4a" output_directory = "audio_clips" os.makedirs(output_directory, exist_ok=True) split_audio_on_silence(input_audio, output_directory)