import numpy as np import librosa from pydub import AudioSegment import gradio as gr def load_audio(file_path): audio, sr = librosa.load(file_path, sr=None) return audio, sr def get_segment(audio, sr): # Calculate onset envelope and detect onsets onset_env = librosa.onset.onset_strength(audio, sr=sr) onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True) # If no onsets are detected, return a segment from the beginning if len(onset_frames) == 0: return audio[:2048] # Return the first segment of 1 second # Extract the pitch and dynamic range pitches, magnitudes = librosa.piptrack(y=audio, sr=sr) dynamic_range = np.max(audio) - np.min(audio) # Calculate energy over time energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)]) # Threshold to consider a segment musically relevant energy_threshold = np.mean(energy) + np.std(energy) # Find onsets that exceed the energy threshold relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold] # If no relevant onsets are found, fall back to the first detected onset if len(relevant_onsets) == 0: start_frame = onset_frames[0] else: start_frame = relevant_onsets[0] start_sample = librosa.frames_to_samples(start_frame) # Define a segment length in samples (1 second) segment_length = sr # 1 second segment end_sample = min(start_sample + segment_length, len(audio)) # Optionally add overlap for smoother transitions overlap_length = int(sr * 0.5) # 0.5 seconds overlap overlap_start = max(start_sample - overlap_length, 0) # Return the selected segment with some context segment_with_context = audio[overlap_start:end_sample] return segment_with_context def extend_music(file_path, added_minutes): audio, sr = load_audio(file_path) # Extract a segment from the audio segment = get_segment(audio, sr) # Convert the original audio to pydub format for manipulation original_audio_segment = AudioSegment.from_file(file_path) # Calculate the original duration in seconds original_duration_seconds = len(audio) / sr # Calculate the new duration in seconds new_duration_seconds = original_duration_seconds + (added_minutes * 60) # Calculate how many times to repeat the segment based on new duration num_repeats = int(new_duration_seconds // (len(segment) / sr)) # Create the extended audio by repeating the segment extended_audio = original_audio_segment + segment.tobytes() * num_repeats return extended_audio def process_audio(file, added_minutes): extended_audio = extend_music(file, added_minutes) output_path = "extended_audio.mp3" extended_audio.export(output_path, format="mp3") return output_path # Create the Gradio interface iface = gr.Interface( fn=process_audio, inputs=[ gr.Audio(type="filepath"), # Removed the source argument gr.Slider(minimum=0, maximum=10, default=1, label="Additional Minutes") ], outputs=gr.Audio(type="file"), # Updated here title="Advanced Music Extender", description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration." ) iface.launch()