Spaces:

szili2011
/

ExtendSong

Runtime error

File size: 3,499 Bytes

a27bf5b
 
 
dd329e3
 
848bd00
 
 
a27bf5b
dd329e3
a27bf5b
 
 
 
dd329e3
a27bf5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
848bd00
 
a27bf5b
 
 
 
 
848bd00
a27bf5b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dd329e3
 
 
a27bf5b
 
d41861e
848bd00
a27bf5b
d41861e
a27bf5b
 
dd329e3
 
a27bf5b

import numpy as np
import librosa
from pydub import AudioSegment
import gradio as gr

def load_audio(file):
    # Use librosa to load the audio file
    audio, sr = librosa.load(file.name, sr=None)
    return audio, sr

def get_segment(audio, sr):
    # Calculate onset envelope and detect onsets
    onset_env = librosa.onset.onset_strength(audio, sr=sr)
    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)

    # If no onsets are detected, return a segment from the beginning
    if len(onset_frames) == 0:
        return audio[:2048]  # Return the first segment of 1 second

    # Extract the pitch and dynamic range
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
    dynamic_range = np.max(audio) - np.min(audio)

    # Calculate energy over time
    energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
    
    # Threshold to consider a segment musically relevant
    energy_threshold = np.mean(energy) + np.std(energy)

    # Find onsets that exceed the energy threshold
    relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]

    # If no relevant onsets are found, fall back to the first detected onset
    if len(relevant_onsets) == 0:
        start_frame = onset_frames[0]
    else:
        start_frame = relevant_onsets[0]

    start_sample = librosa.frames_to_samples(start_frame)

    # Define a segment length in samples (1 second)
    segment_length = sr  # 1 second segment
    end_sample = min(start_sample + segment_length, len(audio))

    # Optionally add overlap for smoother transitions
    overlap_length = int(sr * 0.5)  # 0.5 seconds overlap
    overlap_start = max(start_sample - overlap_length, 0)

    # Return the selected segment with some context
    segment_with_context = audio[overlap_start:end_sample]

    return segment_with_context

def extend_music(file, added_minutes):
    audio, sr = load_audio(file)

    # Extract a segment from the audio
    segment = get_segment(audio, sr)

    # Convert the original audio to pydub format for manipulation
    original_audio_segment = AudioSegment.from_file(file.name)

    # Calculate the original duration in seconds
    original_duration_seconds = len(audio) / sr
    
    # Calculate the new duration in seconds
    new_duration_seconds = original_duration_seconds + (added_minutes * 60)

    # Calculate how many times to repeat the segment based on new duration
    num_repeats = int(new_duration_seconds // (len(segment) / sr))

    # Create the extended audio by repeating the segment
    extended_audio = original_audio_segment + segment.tobytes() * num_repeats

    return extended_audio

def process_audio(file, added_minutes):
    extended_audio = extend_music(file, added_minutes)
    output_path = "extended_audio.mp3"
    extended_audio.export(output_path, format="mp3")
    return output_path

# Create the Gradio interface
iface = gr.Interface(
    fn=process_audio,
    inputs=[
        gr.Audio(type="filepath"),  # File input for audio
        gr.Slider(minimum=0, maximum=10, value=1, label="Additional Minutes")  # Slider for additional minutes
    ],
    outputs=gr.Audio(type="file"),  # Output for the extended audio
    title="Advanced Music Extender",
    description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
)

iface.launch()