Spaces:
Sleeping
Sleeping
from pydub import AudioSegment | |
from pydub.silence import detect_silence | |
import os | |
import json | |
def split_audio_on_silence(input_audio_path, output_dir, silence_thresh=-40, min_silence_len=120, keep_silence=30): | |
""" | |
Splits an audio file into multiple clips when silence is detected and generates a JSON with timestamps. | |
:param input_audio_path: Path to the input audio file | |
:param output_dir: Directory to save the output clips | |
:param silence_thresh: Silence threshold in dBFS (default: -40 dBFS) | |
:param min_silence_len: Minimum silence length to detect in milliseconds (default: 500 ms) | |
:param keep_silence: Milliseconds of silence to retain at the beginning and end of each clip (default: 200 ms) | |
""" | |
# Load the audio file | |
audio = AudioSegment.from_file(input_audio_path) | |
# Detect silences in the audio | |
silence_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh) | |
# Adjust silence ranges to keep a bit of silence in each segment | |
adjusted_ranges = [(start - keep_silence, end + keep_silence) for start, end in silence_ranges] | |
adjusted_ranges = [(max(0, start), min(len(audio), end)) for start, end in adjusted_ranges] | |
# List to store clip metadata | |
clips_metadata = [] | |
# If no silences are detected, save the whole audio as one clip | |
if not adjusted_ranges: | |
output_path = os.path.join(output_dir, "clip_0.wav") | |
audio.export(output_path, format="wav") | |
clips_metadata.append({ | |
"clip_number": 0, | |
"start_time_ms": 0, | |
"end_time_ms": len(audio), | |
"file_name": "clip_0.wav" | |
}) | |
print(f"No silences detected. Entire audio saved as {output_path}") | |
# Save metadata to JSON | |
with open(os.path.join(output_dir, "clips_metadata.json"), "w") as json_file: | |
json.dump(clips_metadata, json_file, indent=4) | |
return | |
# Split audio into clips based on silence | |
start_idx = 0 | |
for i, (start, end) in enumerate(adjusted_ranges): | |
if start > start_idx: | |
clip = audio[start_idx:start] | |
file_name = f"clip_{i}.wav" | |
output_path = os.path.join(output_dir, file_name) | |
clip.export(output_path, format="wav") | |
print(f"Saved clip: {output_path}") | |
# Add metadata for the clip | |
clips_metadata.append({ | |
"clip_number": i, | |
"start_time_ms": start_idx, | |
"end_time_ms": start, | |
"file_name": file_name | |
}) | |
start_idx = end | |
# Save the last segment after the final silence | |
if start_idx < len(audio): | |
file_name = f"clip_{len(adjusted_ranges)}.wav" | |
output_path = os.path.join(output_dir, file_name) | |
audio[start_idx:].export(output_path, format="wav") | |
print(f"Saved clip: {output_path}") | |
# Add metadata for the last clip | |
clips_metadata.append({ | |
"clip_number": len(adjusted_ranges), | |
"start_time_ms": start_idx, | |
"end_time_ms": len(audio), | |
"file_name": file_name | |
}) | |
# Save metadata to JSON | |
json_path = os.path.join(output_dir, "clips_metadata.json") | |
with open(json_path, "w") as json_file: | |
json.dump(clips_metadata, json_file, indent=4) | |
print(f"Clip metadata saved to {json_path}") | |
if __name__ == "__main__": | |
input_audio = "male_audio.m4a" | |
output_directory = "audio_clips" | |
os.makedirs(output_directory, exist_ok=True) | |
split_audio_on_silence(input_audio, output_directory) | |