szili2011 commited on
Commit
a27bf5b
·
verified ·
1 Parent(s): cd8dc73

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +85 -19
app.py CHANGED
@@ -1,28 +1,94 @@
 
 
 
1
  import gradio as gr
2
- from audiocraft.models import MusicGen
3
 
4
- # Load the MusicGen model
5
- model = MusicGen.from_pretrained("facebook/musicgen-medium")
 
6
 
7
- def generate_music(prompt):
8
- # Generate music based on the prompt
9
- generated_audio = model.generate(prompt, max_length=30) # Adjust the length as needed
10
- return generated_audio
11
 
12
- def process_song(input_audio):
13
- # Process the input audio file and generate extended music
14
- prompt = "Generate a continuation for the song based on its style." # Example prompt
15
- extended_music = generate_music(prompt)
16
- return extended_music
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  # Create the Gradio interface
19
  iface = gr.Interface(
20
- fn=process_song,
21
- inputs=gr.inputs.Audio(label="Upload Your Song", type="filepath"),
22
- outputs=gr.outputs.Audio(label="Extended Song"),
23
- title="MusicGen Song Extender",
24
- description="Upload a song to extend it using MusicGen. The model generates additional music based on the input."
 
 
 
25
  )
26
 
27
- if __name__ == "__main__":
28
- iface.launch()
 
1
+ import numpy as np
2
+ import librosa
3
+ from pydub import AudioSegment
4
  import gradio as gr
 
5
 
6
+ def load_audio(file_path):
7
+ audio, sr = librosa.load(file_path, sr=None)
8
+ return audio, sr
9
 
10
+ def get_segment(audio, sr):
11
+ # Calculate onset envelope and detect onsets
12
+ onset_env = librosa.onset.onset_strength(audio, sr=sr)
13
+ onset_frames = librosa.onset.onset_detect(onset_envelope=onset_env, sr=sr, backtrack=True)
14
 
15
+ # If no onsets are detected, return a segment from the beginning
16
+ if len(onset_frames) == 0:
17
+ return audio[:2048] # Return the first segment of 1 second
18
+
19
+ # Extract the pitch and dynamic range
20
+ pitches, magnitudes = librosa.piptrack(y=audio, sr=sr)
21
+ dynamic_range = np.max(audio) - np.min(audio)
22
+
23
+ # Calculate energy over time
24
+ energy = np.array([np.sum(np.abs(audio[i:i + 2048]**2)) for i in range(0, len(audio), 2048)])
25
+
26
+ # Threshold to consider a segment musically relevant
27
+ energy_threshold = np.mean(energy) + np.std(energy)
28
+
29
+ # Find onsets that exceed the energy threshold
30
+ relevant_onsets = [f for f in onset_frames if energy[f] > energy_threshold]
31
+
32
+ # If no relevant onsets are found, fall back to the first detected onset
33
+ if len(relevant_onsets) == 0:
34
+ start_frame = onset_frames[0]
35
+ else:
36
+ start_frame = relevant_onsets[0]
37
+
38
+ start_sample = librosa.frames_to_samples(start_frame)
39
+
40
+ # Define a segment length in samples (1 second)
41
+ segment_length = sr # 1 second segment
42
+ end_sample = min(start_sample + segment_length, len(audio))
43
+
44
+ # Optionally add overlap for smoother transitions
45
+ overlap_length = int(sr * 0.5) # 0.5 seconds overlap
46
+ overlap_start = max(start_sample - overlap_length, 0)
47
+
48
+ # Return the selected segment with some context
49
+ segment_with_context = audio[overlap_start:end_sample]
50
+
51
+ return segment_with_context
52
+
53
+ def extend_music(file_path, added_minutes):
54
+ audio, sr = load_audio(file_path)
55
+
56
+ # Extract a segment from the audio
57
+ segment = get_segment(audio, sr)
58
+
59
+ # Convert the original audio to pydub format for manipulation
60
+ original_audio_segment = AudioSegment.from_file(file_path)
61
+
62
+ # Calculate the original duration in seconds
63
+ original_duration_seconds = len(audio) / sr
64
+
65
+ # Calculate the new duration in seconds
66
+ new_duration_seconds = original_duration_seconds + (added_minutes * 60)
67
+
68
+ # Calculate how many times to repeat the segment based on new duration
69
+ num_repeats = int(new_duration_seconds // (len(segment) / sr))
70
+
71
+ # Create the extended audio by repeating the segment
72
+ extended_audio = original_audio_segment + segment.tobytes() * num_repeats
73
+
74
+ return extended_audio
75
+
76
+ def process_audio(file, added_minutes):
77
+ extended_audio = extend_music(file, added_minutes)
78
+ output_path = "extended_audio.mp3"
79
+ extended_audio.export(output_path, format="mp3")
80
+ return output_path
81
 
82
  # Create the Gradio interface
83
  iface = gr.Interface(
84
+ fn=process_audio,
85
+ inputs=[
86
+ gr.inputs.Audio(source="upload", type="filepath"),
87
+ gr.inputs.Slider(minimum=0, maximum=10, default=1, label="Additional Minutes") # Adjust max as needed
88
+ ],
89
+ outputs=gr.outputs.Audio(type="file"),
90
+ title="Advanced Music Extender",
91
+ description="Upload an audio file, and this app will extend the music by cutting and appending a segment based on advanced audio features. Choose additional minutes to extend the audio duration."
92
  )
93
 
94
+ iface.launch()