Spaces:

szili2011
/

ExtendSong

Runtime error

App Files Files Community

szili2011 commited on Oct 20, 2024

Commit

a27bf5b

verified ·

1 Parent(s): cd8dc73

Update app.py

Browse files

Files changed (1) hide show

app.py +85 -19

app.py CHANGED Viewed

@@ -1,28 +1,94 @@
 import gradio as gr
-from audiocraft.models import MusicGen
-# Load the MusicGen model
-model = MusicGen.from_pretrained("facebook/musicgen-medium")
-def generate_music(prompt):
-    # Generate music based on the prompt
-    generated_audio = model.generate(prompt, max_length=30)  # Adjust the length as needed
-    return generated_audio
-def process_song(input_audio):
-    # Process the input audio file and generate extended music
-    prompt = "Generate a continuation for the song based on its style."  # Example prompt
-    extended_music = generate_music(prompt)
-    return extended_music
 # Create the Gradio interface
 iface = gr.Interface(
-    fn=process_song,
-    inputs=gr.inputs.Audio(label="Upload Your Song", type="filepath"),
-    outputs=gr.outputs.Audio(label="Extended Song"),
-    title="MusicGen Song Extender",
-    description="Upload a song to extend it using MusicGen. The model generates additional music based on the input."
 )
-if __name__ == "__main__":
-    iface.launch()

+import numpy as np
+import librosa
+from pydub import AudioSegment
 import gradio as gr
+def load_audio(file_path):
+    audio, sr = librosa.load(file_path, sr=None)
+    return audio, sr
+def get_segment(audio, sr):
+    # Calculate onset envelope and detect onsets
+    onset_env = librosa.onset.onset_strength(audio, sr=sr)
+    onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
+    # If no onsets are detected, return a segment from the beginning
+    if len(onset_frames) == 0:
+        return audio[:2048]  # Return the first segment of 1 second
+    # Extract the pitch and dynamic range
+    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
+    dynamic_range = np.max(audio) - np.min(audio)
+    # Calculate energy over time
+    energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
+    # Threshold to consider a segment musically relevant
+    energy_threshold = np.mean(energy) + np.std(energy)
+    # Find onsets that exceed the energy threshold
+    relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]
+    # If no relevant onsets are found, fall back to the first detected onset
+    if len(relevant_onsets) == 0:
+        start_frame = onset_frames[0]
+    else:
+        start_frame = relevant_onsets[0]
+    start_sample = librosa.frames_to_samples(start_frame)
+    # Define a segment length in samples (1 second)
+    segment_length = sr  # 1 second segment
+    end_sample = min(start_sample + segment_length, len(audio))
+    # Optionally add overlap for smoother transitions
+    overlap_length = int(sr * 0.5)  # 0.5 seconds overlap
+    overlap_start = max(start_sample - overlap_length, 0)
+    # Return the selected segment with some context
+    segment_with_context = audio[overlap_start:end_sample]
+    return segment_with_context
+def extend_music(file_path, added_minutes):
+    audio, sr = load_audio(file_path)
+    # Extract a segment from the audio
+    segment = get_segment(audio, sr)
+    # Convert the original audio to pydub format for manipulation
+    original_audio_segment = AudioSegment.from_file(file_path)
+    # Calculate the original duration in seconds
+    original_duration_seconds = len(audio) / sr
+    # Calculate the new duration in seconds
+    new_duration_seconds = original_duration_seconds + (added_minutes * 60)
+    # Calculate how many times to repeat the segment based on new duration
+    num_repeats = int(new_duration_seconds // (len(segment) / sr))
+    # Create the extended audio by repeating the segment
+    extended_audio = original_audio_segment + segment.tobytes() * num_repeats
+    return extended_audio
+def process_audio(file, added_minutes):
+    extended_audio = extend_music(file, added_minutes)
+    output_path = "extended_audio.mp3"
+    extended_audio.export(output_path, format="mp3")
+    return output_path
 # Create the Gradio interface
 iface = gr.Interface(
+    fn=process_audio,
+    inputs=[
+        gr.inputs.Audio(source="upload", type="filepath"),
+        gr.inputs.Slider(minimum=0, maximum=10, default=1, label="Additional Minutes")  # Adjust max as needed
+    ],
+    outputs=gr.outputs.Audio(type="file"),
+    title="Advanced Music Extender",
+    description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
 )
+iface.launch()