Spaces:

yuvabe-ai
/

NumberVideo_Bot

Sleeping

File size: 3,634 Bytes

41cfa9c

from pydub import AudioSegment
from pydub.silence import detect_silence
import os
import json

def split_audio_on_silence(input_audio_path, output_dir, silence_thresh=-40, min_silence_len=120, keep_silence=30):
    """
    Splits an audio file into multiple clips when silence is detected and generates a JSON with timestamps.
    
    :param input_audio_path: Path to the input audio file
    :param output_dir: Directory to save the output clips
    :param silence_thresh: Silence threshold in dBFS (default: -40 dBFS)
    :param min_silence_len: Minimum silence length to detect in milliseconds (default: 500 ms)
    :param keep_silence: Milliseconds of silence to retain at the beginning and end of each clip (default: 200 ms)
    """
    # Load the audio file
    audio = AudioSegment.from_file(input_audio_path)
    
    # Detect silences in the audio
    silence_ranges = detect_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh)
    
    # Adjust silence ranges to keep a bit of silence in each segment
    adjusted_ranges = [(start - keep_silence, end + keep_silence) for start, end in silence_ranges]
    adjusted_ranges = [(max(0, start), min(len(audio), end)) for start, end in adjusted_ranges]

    # List to store clip metadata
    clips_metadata = []

    # If no silences are detected, save the whole audio as one clip
    if not adjusted_ranges:
        output_path = os.path.join(output_dir, "clip_0.wav")
        audio.export(output_path, format="wav")
        clips_metadata.append({
            "clip_number": 0,
            "start_time_ms": 0,
            "end_time_ms": len(audio),
            "file_name": "clip_0.wav"
        })
        print(f"No silences detected. Entire audio saved as {output_path}")
        # Save metadata to JSON
        with open(os.path.join(output_dir, "clips_metadata.json"), "w") as json_file:
            json.dump(clips_metadata, json_file, indent=4)
        return

    # Split audio into clips based on silence
    start_idx = 0
    for i, (start, end) in enumerate(adjusted_ranges):
        if start > start_idx:
            clip = audio[start_idx:start]
            file_name = f"clip_{i}.wav"
            output_path = os.path.join(output_dir, file_name)
            clip.export(output_path, format="wav")
            print(f"Saved clip: {output_path}")
            
            # Add metadata for the clip
            clips_metadata.append({
                "clip_number": i,
                "start_time_ms": start_idx,
                "end_time_ms": start,
                "file_name": file_name
            })
        start_idx = end

    # Save the last segment after the final silence
    if start_idx < len(audio):
        file_name = f"clip_{len(adjusted_ranges)}.wav"
        output_path = os.path.join(output_dir, file_name)
        audio[start_idx:].export(output_path, format="wav")
        print(f"Saved clip: {output_path}")
        
        # Add metadata for the last clip
        clips_metadata.append({
            "clip_number": len(adjusted_ranges),
            "start_time_ms": start_idx,
            "end_time_ms": len(audio),
            "file_name": file_name
        })

    # Save metadata to JSON
    json_path = os.path.join(output_dir, "clips_metadata.json")
    with open(json_path, "w") as json_file:
        json.dump(clips_metadata, json_file, indent=4)
    print(f"Clip metadata saved to {json_path}")


if __name__ == "__main__":
    input_audio = "male_audio.m4a"
    output_directory = "audio_clips"

    os.makedirs(output_directory, exist_ok=True)

    split_audio_on_silence(input_audio, output_directory)