Spaces:

szili2011
/

ExtendSong

Runtime error

App Files Files Community

szili2011 commited on Oct 20, 2024

Commit

4908947

verified ·

1 Parent(s): af5b23f

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -61

app.py CHANGED Viewed

@@ -1,26 +1,23 @@
 import numpy as np
 import librosa
-from pydub import AudioSegment
-import gradio as gr
 def load_audio(file):
-    # Use librosa to load the audio file directly from the file path
-    audio, sr = librosa.load(file, sr=None)  # Changed file.name to file
     return audio, sr
 def get_segment(audio, sr):
     # Calculate onset envelope and detect onsets
-    onset_env = librosa.onset.onset_strength(y=audio, sr=sr)  # Fixed the argument here
     onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
     # If no onsets are detected, return a segment from the beginning
     if len(onset_frames) == 0:
         return audio[:2048]  # Return the first segment of 1 second
-    # Extract the pitch and dynamic range
-    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
-    dynamic_range = np.max(audio) - np.min(audio)
     # Calculate energy over time
     energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
@@ -28,73 +25,41 @@ def get_segment(audio, sr):
     energy_threshold = np.mean(energy) + np.std(energy)
     # Find onsets that exceed the energy threshold
-    relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]
     # If no relevant onsets are found, fall back to the first detected onset
-    if len(relevant_onsets) == 0:
-        start_frame = onset_frames[0]
-    else:
-        start_frame = relevant_onsets[0]
     start_sample = librosa.frames_to_samples(start_frame)
     # Define a segment length in samples (1 second)
     segment_length = sr  # 1 second segment
     end_sample = min(start_sample + segment_length, len(audio))
-    # Optionally add overlap for smoother transitions
-    overlap_length = int(sr * 0.5)  # 0.5 seconds overlap
-    overlap_start = max(start_sample - overlap_length, 0)
-    # Return the selected segment with some context
-    segment_with_context = audio[overlap_start:end_sample]
-    return segment_with_context
 def extend_music(file, added_minutes):
     audio, sr = load_audio(file)
-    # Extract a segment from the audio
     segment = get_segment(audio, sr)
-    # Convert the original audio to pydub format for manipulation
-    original_audio_segment = AudioSegment.from_file(file)
-    # Calculate the original duration in seconds
-    original_duration_seconds = len(audio) / sr
-    # Calculate the new duration in seconds
-    new_duration_seconds = original_duration_seconds + (added_minutes * 60)
-    # Calculate how many times to repeat the segment based on new duration
-    num_repeats = int(new_duration_seconds // (len(segment) / sr))
-    # Create the extended audio by repeating the segment
-    extended_audio = original_audio_segment + segment.tobytes() * num_repeats
     return extended_audio
-def process_audio(file, added_minutes):
-    extended_audio = extend_music(file, added_minutes)
-    # Export the extended audio to a temporary file and load it back
-    output_path = "extended_audio.mp3"
-    extended_audio.export(output_path, format="mp3")
-    # Load the exported audio for returning
-    audio_output, _ = librosa.load(output_path, sr=None)
-    return audio_output
-# Create the Gradio interface
-iface = gr.Interface(
-    fn=process_audio,
-    inputs=[
-        gr.Audio(type="filepath"),  # File input for audio
-        gr.Slider(minimum=0, maximum=10, value=1, label="Additional Minutes")  # Slider for additional minutes
-    ],
-    outputs=gr.Audio(type="numpy"),  # Output for the extended audio as numpy
-    title="Advanced Music Extender",
-    description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
-)
-iface.launch()

+import gradio as gr
 import numpy as np
 import librosa
+# Function to load audio using librosa
 def load_audio(file):
+    # Load the audio file and return the audio array and sample rate
+    audio, sr = librosa.load(file, sr=None)
     return audio, sr
+# Function to get a relevant audio segment based on onset detection
 def get_segment(audio, sr):
     # Calculate onset envelope and detect onsets
+    onset_env = librosa.onset.onset_strength(y=audio, sr=sr)
     onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
     # If no onsets are detected, return a segment from the beginning
     if len(onset_frames) == 0:
         return audio[:2048]  # Return the first segment of 1 second
     # Calculate energy over time
     energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
     energy_threshold = np.mean(energy) + np.std(energy)
     # Find onsets that exceed the energy threshold
+    relevant_onsets = [f for f in onset_frames if f < len(energy) and energy[f] > energy_threshold]
     # If no relevant onsets are found, fall back to the first detected onset
+    start_frame = relevant_onsets[0] if relevant_onsets else onset_frames[0]
     start_sample = librosa.frames_to_samples(start_frame)
     # Define a segment length in samples (1 second)
     segment_length = sr  # 1 second segment
     end_sample = min(start_sample + segment_length, len(audio))
+    # Return the selected segment
+    return audio[start_sample:end_sample]
+# Function to extend music by adding silence
 def extend_music(file, added_minutes):
     audio, sr = load_audio(file)
+    # Get a relevant segment from the audio
     segment = get_segment(audio, sr)
+    # Calculate the number of samples to add based on the duration in minutes
+    additional_samples = added_minutes * 60 * sr
+    extended_audio = np.concatenate([segment] + [np.zeros(int(additional_samples))])
     return extended_audio
+# Gradio UI setup
+with gr.Blocks() as app:
+    gr.Markdown("# Audio Extender")
+    audio_input = gr.Audio(source="upload", type="filepath", label="Upload Audio File")
+    added_minutes = gr.Slider(minimum=0, maximum=10, label="Additional Minutes")
+    audio_output = gr.Audio(type="numpy", label="Extended Audio")
+    submit_button = gr.Button("Extend Audio")
+    submit_button.click(extend_music, inputs=[audio_input, added_minutes], outputs=audio_output)
+# Launch the app
+app.launch()