thecollabagepatch commited on
Commit
da99657
1 Parent(s): 2907ea9

fixing continue_music

Browse files

the way it was appending at the end of generation was causing it to duplicate the section used in prompt_duration, killin them seamless vibes.

no idea how i managed to generate seamlessly several times before noticing. i may be too good at jamming with gary for my demos to be useful 😂

Files changed (1) hide show
  1. app.py +315 -308
app.py CHANGED
@@ -1,309 +1,316 @@
1
- import gradio as gr
2
- from musiclang_predict import MusicLangPredictor
3
- import random
4
- import subprocess
5
- import os
6
- import torchaudio
7
- import torch
8
- import numpy as np
9
- from audiocraft.models import MusicGen
10
- from audiocraft.data.audio import audio_write
11
- from pydub import AudioSegment
12
- import spaces
13
- import tempfile
14
- from pydub import AudioSegment
15
-
16
- # Check if CUDA is available
17
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
-
19
- # Utility Functions
20
- def peak_normalize(y, target_peak=0.97):
21
- return target_peak * (y / np.max(np.abs(y)))
22
-
23
- def rms_normalize(y, target_rms=0.05):
24
- return y * (target_rms / np.sqrt(np.mean(y**2)))
25
-
26
- def preprocess_audio(waveform):
27
- waveform_np = waveform.cpu().squeeze().numpy() # Move to CPU before converting to NumPy
28
- # processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
29
- return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
30
-
31
- def create_slices(song, sr, slice_duration, bpm, num_slices=5):
32
- song_length = song.shape[-1] / sr
33
- slices = []
34
-
35
- # Ensure the first slice is from the beginning of the song
36
- first_slice_waveform = song[..., :int(slice_duration * sr)]
37
- slices.append(first_slice_waveform)
38
-
39
- for i in range(1, num_slices):
40
- possible_start_indices = list(range(int(slice_duration * sr), int(song_length * sr), int(4 * 60 / bpm * sr)))
41
- if not possible_start_indices:
42
- # If there are no valid start indices, duplicate the first slice
43
- slices.append(first_slice_waveform)
44
- continue
45
-
46
- random_start = random.choice(possible_start_indices)
47
- slice_end = random_start + int(slice_duration * sr)
48
-
49
- if slice_end > song_length * sr:
50
- # Wrap around to the beginning of the song
51
- remaining_samples = int(slice_end - song_length * sr)
52
- slice_waveform = torch.cat([song[..., random_start:], song[..., :remaining_samples]], dim=-1)
53
- else:
54
- slice_waveform = song[..., random_start:slice_end]
55
-
56
- if len(slice_waveform.squeeze()) < int(slice_duration * sr):
57
- additional_samples_needed = int(slice_duration * sr) - len(slice_waveform.squeeze())
58
- slice_waveform = torch.cat([slice_waveform, song[..., :additional_samples_needed]], dim=-1)
59
-
60
- slices.append(slice_waveform)
61
-
62
- return slices
63
-
64
- def calculate_duration(bpm, min_duration=29, max_duration=30):
65
- single_bar_duration = 4 * 60 / bpm
66
- bars = max(min_duration // single_bar_duration, 1)
67
-
68
- while single_bar_duration * bars < min_duration:
69
- bars += 1
70
-
71
- duration = single_bar_duration * bars
72
-
73
- while duration > max_duration and bars > 1:
74
- bars -= 1
75
- duration = single_bar_duration * bars
76
-
77
- return duration
78
-
79
- @spaces.GPU(duration=60)
80
- def generate_midi(seed, use_chords, chord_progression, bpm):
81
- if seed == "":
82
- seed = random.randint(1, 10000)
83
-
84
- ml = MusicLangPredictor('musiclang/musiclang-v2')
85
-
86
- try:
87
- seed = int(seed)
88
- except ValueError:
89
- seed = random.randint(1, 10000)
90
-
91
- nb_tokens = 1024
92
- temperature = 0.9
93
- top_p = 1.0
94
-
95
- if use_chords and chord_progression.strip():
96
- score = ml.predict_chords(
97
- chord_progression,
98
- time_signature=(4, 4),
99
- temperature=temperature,
100
- topp=top_p,
101
- rng_seed=seed
102
- )
103
- else:
104
- score = ml.predict(
105
- nb_tokens=nb_tokens,
106
- temperature=temperature,
107
- topp=top_p,
108
- rng_seed=seed
109
- )
110
-
111
- midi_filename = f"output_{seed}.mid"
112
- wav_filename = midi_filename.replace(".mid", ".wav")
113
-
114
- score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
115
-
116
- subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
117
-
118
- # Clean up temporary MIDI file
119
- os.remove(midi_filename)
120
-
121
- sample_rate = 44100 # Assuming fixed sample rate from fluidsynth command
122
- return wav_filename
123
-
124
- @spaces.GPU(duration=90)
125
- def generate_music(wav_filename, prompt_duration, musicgen_model, num_iterations, bpm):
126
- # Load the audio from the passed file path
127
- song, sr = torchaudio.load(wav_filename)
128
- song = song.to(device)
129
- # Use the user-provided BPM value for duration calculation
130
- duration = calculate_duration(bpm)
131
-
132
- # Create slices from the song using the user-provided BPM value
133
- slices = create_slices(song, sr, 35, bpm, num_slices=5)
134
-
135
- # Load the model
136
- model_name = musicgen_model.split(" ")[0]
137
- model_continue = MusicGen.get_pretrained(model_name)
138
-
139
- # Setting generation parameters
140
- model_continue.set_generation_params(
141
- use_sampling=True,
142
- top_k=250,
143
- top_p=0.0,
144
- temperature=1.0,
145
- duration=duration,
146
- cfg_coef=3
147
- )
148
-
149
- all_audio_files = []
150
-
151
- for i in range(num_iterations):
152
- slice_idx = i % len(slices)
153
-
154
- print(f"Running iteration {i + 1} using slice {slice_idx}...")
155
-
156
- prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
157
- prompt_waveform = preprocess_audio(prompt_waveform)
158
-
159
- output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
160
- output = output.cpu() # Move the output tensor back to CPU
161
-
162
- # Make sure the output tensor has at most 2 dimensions
163
- if len(output.size()) > 2:
164
- output = output.squeeze()
165
-
166
- filename_without_extension = f'continue_{i}'
167
- filename_with_extension = f'{filename_without_extension}.wav'
168
-
169
- audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
170
- all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
171
-
172
- # Combine all audio files
173
- combined_audio = AudioSegment.empty()
174
- for filename in all_audio_files:
175
- combined_audio += AudioSegment.from_wav(filename)
176
-
177
- combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
178
- combined_audio.export(combined_audio_filename, format="mp3")
179
-
180
- # Clean up temporary files
181
- for filename in all_audio_files:
182
- os.remove(filename)
183
-
184
- return combined_audio_filename
185
-
186
- @spaces.GPU(duration=90)
187
- def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterations, bpm):
188
- # Load the audio from the given file path
189
- song, sr = torchaudio.load(input_audio_path)
190
- song = song.to(device)
191
-
192
- # Calculate the slice from the end of the song based on prompt_duration
193
- num_samples = int(prompt_duration * sr)
194
- if song.shape[-1] < num_samples:
195
- raise ValueError("The prompt_duration is longer than the audio length.")
196
- start_idx = song.shape[-1] - num_samples
197
- prompt_waveform = song[..., start_idx:]
198
-
199
- # Prepare the audio slice for generation
200
- prompt_waveform = preprocess_audio(prompt_waveform)
201
-
202
- # Load the model and set generation parameters
203
- model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
204
- model_continue.set_generation_params(
205
- use_sampling=True,
206
- top_k=250,
207
- top_p=0.0,
208
- temperature=1.0,
209
- duration=calculate_duration(bpm),
210
- cfg_coef=3
211
- )
212
-
213
- original_audio = AudioSegment.from_mp3(input_audio_path)
214
- all_audio_files = [original_audio] # Start with the original audio
215
- file_paths_for_cleanup = [] # List to track generated file paths for cleanup
216
-
217
- for i in range(num_iterations):
218
- output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
219
- output = output.cpu() # Move the output tensor back to CPU
220
- if len(output.size()) > 2:
221
- output = output.squeeze()
222
-
223
- filename_without_extension = f'continue_{i}'
224
- filename_with_extension = f'{filename_without_extension}.wav'
225
- correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
226
-
227
- audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
228
- new_audio_segment = AudioSegment.from_wav(correct_filename_extension)
229
- all_audio_files.append(new_audio_segment)
230
- file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
231
-
232
- # Combine all audio files into one continuous segment
233
- combined_audio = sum(all_audio_files)
234
-
235
- combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
236
- combined_audio.export(combined_audio_filename, format="mp3")
237
-
238
- # Clean up temporary files using the list of file paths
239
- for file_path in file_paths_for_cleanup:
240
- os.remove(file_path)
241
-
242
- return combined_audio_filename
243
-
244
-
245
-
246
- # Define the expandable sections
247
- musiclang_blurb = """
248
- ## musiclang
249
- musiclang is a controllable ai midi model. it can generate midi sequences based on user-provided parameters, or unconditionally.
250
- [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> musiclang github](https://github.com/MusicLang/musiclang_predict)
251
- [<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musiclang huggingface space](https://huggingface.co/spaces/musiclang/musiclang-predict)
252
- """
253
-
254
- musicgen_blurb = """
255
- ## musicgen
256
- musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
257
- [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
258
- visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
259
- see also https://youtube.com/@thecollabagepatch
260
- """
261
-
262
- finetunes_blurb = """
263
- ## fine-tuned models
264
- the fine-tunes hosted on the huggingface hub are provided collectively by the musicgen discord community. thanks to vanya, mj, hoenn, septicDNB and of course, lyra.
265
- [<img src="https://cdn.iconscout.com/icon/free/png-256/discord-3691244-3073764.png" alt="Discord" width="20" style="vertical-align:middle"> musicgen discord](https://discord.gg/93kX8rGZ)
266
- [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
267
- """
268
-
269
- # Create the Gradio interface
270
- with gr.Blocks() as iface:
271
- gr.Markdown("# the-slot-machine")
272
- gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
273
- gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
274
-
275
- with gr.Accordion("more info", open=False):
276
- gr.Markdown(musiclang_blurb)
277
- gr.Markdown(musicgen_blurb)
278
- gr.Markdown(finetunes_blurb)
279
-
280
- with gr.Row():
281
- with gr.Column():
282
- seed = gr.Textbox(label="Seed (leave blank for random)", value="")
283
- use_chords = gr.Checkbox(label="Control Chord Progression", value=False)
284
- chord_progression = gr.Textbox(label="Chord Progression (e.g., Am CM Dm E7 Am)", visible=True)
285
- bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=120)
286
- generate_midi_button = gr.Button("Generate MIDI")
287
- midi_audio = gr.Audio(label="Generated MIDI Audio", type="filepath") # Ensure this is set to handle file paths
288
-
289
- with gr.Column():
290
- prompt_duration = gr.Dropdown(label="Prompt Duration (seconds)", choices=list(range(1, 11)), value=5)
291
- musicgen_model = gr.Dropdown(label="MusicGen Model", choices=[
292
- "thepatch/vanya_ai_dnb_0.1 (small)",
293
- "thepatch/budots_remix (small)",
294
- "thepatch/PhonkV2 (small)",
295
- "thepatch/bleeps-medium (medium)",
296
- "thepatch/hoenn_lofi (large)"
297
- ], value="thepatch/vanya_ai_dnb_0.1 (small)")
298
- num_iterations = gr.Slider(label="this does nothing rn", minimum=1, maximum=1, step=1, value=1)
299
- generate_music_button = gr.Button("Generate Music")
300
- output_audio = gr.Audio(label="Generated Music", type="filepath")
301
- continue_button = gr.Button("Continue Generating Music")
302
- continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
303
-
304
- # Connecting the components
305
- generate_midi_button.click(generate_midi, inputs=[seed, use_chords, chord_progression, bpm], outputs=[midi_audio])
306
- generate_music_button.click(generate_music, inputs=[midi_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=[output_audio])
307
- continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=continue_output_audio)
308
-
 
 
 
 
 
 
 
309
  iface.launch()
 
1
+ import gradio as gr
2
+ from musiclang_predict import MusicLangPredictor
3
+ import random
4
+ import subprocess
5
+ import os
6
+ import torchaudio
7
+ import torch
8
+ import numpy as np
9
+ from audiocraft.models import MusicGen
10
+ from audiocraft.data.audio import audio_write
11
+ from pydub import AudioSegment
12
+ import spaces
13
+ import tempfile
14
+ from pydub import AudioSegment
15
+
16
+ # Check if CUDA is available
17
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
18
+
19
+ # Utility Functions
20
+ def peak_normalize(y, target_peak=0.97):
21
+ return target_peak * (y / np.max(np.abs(y)))
22
+
23
+ def rms_normalize(y, target_rms=0.05):
24
+ return y * (target_rms / np.sqrt(np.mean(y**2)))
25
+
26
+ def preprocess_audio(waveform):
27
+ waveform_np = waveform.cpu().squeeze().numpy() # Move to CPU before converting to NumPy
28
+ # processed_waveform_np = rms_normalize(peak_normalize(waveform_np))
29
+ return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
30
+
31
+ def create_slices(song, sr, slice_duration, bpm, num_slices=5):
32
+ song_length = song.shape[-1] / sr
33
+ slices = []
34
+
35
+ # Ensure the first slice is from the beginning of the song
36
+ first_slice_waveform = song[..., :int(slice_duration * sr)]
37
+ slices.append(first_slice_waveform)
38
+
39
+ for i in range(1, num_slices):
40
+ possible_start_indices = list(range(int(slice_duration * sr), int(song_length * sr), int(4 * 60 / bpm * sr)))
41
+ if not possible_start_indices:
42
+ # If there are no valid start indices, duplicate the first slice
43
+ slices.append(first_slice_waveform)
44
+ continue
45
+
46
+ random_start = random.choice(possible_start_indices)
47
+ slice_end = random_start + int(slice_duration * sr)
48
+
49
+ if slice_end > song_length * sr:
50
+ # Wrap around to the beginning of the song
51
+ remaining_samples = int(slice_end - song_length * sr)
52
+ slice_waveform = torch.cat([song[..., random_start:], song[..., :remaining_samples]], dim=-1)
53
+ else:
54
+ slice_waveform = song[..., random_start:slice_end]
55
+
56
+ if len(slice_waveform.squeeze()) < int(slice_duration * sr):
57
+ additional_samples_needed = int(slice_duration * sr) - len(slice_waveform.squeeze())
58
+ slice_waveform = torch.cat([slice_waveform, song[..., :additional_samples_needed]], dim=-1)
59
+
60
+ slices.append(slice_waveform)
61
+
62
+ return slices
63
+
64
+ def calculate_duration(bpm, min_duration=29, max_duration=30):
65
+ single_bar_duration = 4 * 60 / bpm
66
+ bars = max(min_duration // single_bar_duration, 1)
67
+
68
+ while single_bar_duration * bars < min_duration:
69
+ bars += 1
70
+
71
+ duration = single_bar_duration * bars
72
+
73
+ while duration > max_duration and bars > 1:
74
+ bars -= 1
75
+ duration = single_bar_duration * bars
76
+
77
+ return duration
78
+
79
+ @spaces.GPU(duration=60)
80
+ def generate_midi(seed, use_chords, chord_progression, bpm):
81
+ if seed == "":
82
+ seed = random.randint(1, 10000)
83
+
84
+ ml = MusicLangPredictor('musiclang/musiclang-v2')
85
+
86
+ try:
87
+ seed = int(seed)
88
+ except ValueError:
89
+ seed = random.randint(1, 10000)
90
+
91
+ nb_tokens = 1024
92
+ temperature = 0.9
93
+ top_p = 1.0
94
+
95
+ if use_chords and chord_progression.strip():
96
+ score = ml.predict_chords(
97
+ chord_progression,
98
+ time_signature=(4, 4),
99
+ temperature=temperature,
100
+ topp=top_p,
101
+ rng_seed=seed
102
+ )
103
+ else:
104
+ score = ml.predict(
105
+ nb_tokens=nb_tokens,
106
+ temperature=temperature,
107
+ topp=top_p,
108
+ rng_seed=seed
109
+ )
110
+
111
+ midi_filename = f"output_{seed}.mid"
112
+ wav_filename = midi_filename.replace(".mid", ".wav")
113
+
114
+ score.to_midi(midi_filename, tempo=bpm, time_signature=(4, 4))
115
+
116
+ subprocess.run(["fluidsynth", "-ni", "font.sf2", midi_filename, "-F", wav_filename, "-r", "44100"])
117
+
118
+ # Clean up temporary MIDI file
119
+ os.remove(midi_filename)
120
+
121
+ sample_rate = 44100 # Assuming fixed sample rate from fluidsynth command
122
+ return wav_filename
123
+
124
+ @spaces.GPU(duration=90)
125
+ def generate_music(wav_filename, prompt_duration, musicgen_model, num_iterations, bpm):
126
+ # Load the audio from the passed file path
127
+ song, sr = torchaudio.load(wav_filename)
128
+ song = song.to(device)
129
+ # Use the user-provided BPM value for duration calculation
130
+ duration = calculate_duration(bpm)
131
+
132
+ # Create slices from the song using the user-provided BPM value
133
+ slices = create_slices(song, sr, 35, bpm, num_slices=5)
134
+
135
+ # Load the model
136
+ model_name = musicgen_model.split(" ")[0]
137
+ model_continue = MusicGen.get_pretrained(model_name)
138
+
139
+ # Setting generation parameters
140
+ model_continue.set_generation_params(
141
+ use_sampling=True,
142
+ top_k=250,
143
+ top_p=0.0,
144
+ temperature=1.0,
145
+ duration=duration,
146
+ cfg_coef=3
147
+ )
148
+
149
+ all_audio_files = []
150
+
151
+ for i in range(num_iterations):
152
+ slice_idx = i % len(slices)
153
+
154
+ print(f"Running iteration {i + 1} using slice {slice_idx}...")
155
+
156
+ prompt_waveform = slices[slice_idx][..., :int(prompt_duration * sr)]
157
+ prompt_waveform = preprocess_audio(prompt_waveform)
158
+
159
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
160
+ output = output.cpu() # Move the output tensor back to CPU
161
+
162
+ # Make sure the output tensor has at most 2 dimensions
163
+ if len(output.size()) > 2:
164
+ output = output.squeeze()
165
+
166
+ filename_without_extension = f'continue_{i}'
167
+ filename_with_extension = f'{filename_without_extension}.wav'
168
+
169
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
170
+ all_audio_files.append(f'{filename_without_extension}.wav.wav') # Assuming the library appends an extra .wav
171
+
172
+ # Combine all audio files
173
+ combined_audio = AudioSegment.empty()
174
+ for filename in all_audio_files:
175
+ combined_audio += AudioSegment.from_wav(filename)
176
+
177
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
178
+ combined_audio.export(combined_audio_filename, format="mp3")
179
+
180
+ # Clean up temporary files
181
+ for filename in all_audio_files:
182
+ os.remove(filename)
183
+
184
+ return combined_audio_filename
185
+
186
+ @spaces.GPU(duration=90)
187
+ def continue_music(input_audio_path, prompt_duration, musicgen_model, num_iterations, bpm):
188
+ # Load the audio from the given file path
189
+ song, sr = torchaudio.load(input_audio_path)
190
+ song = song.to(device)
191
+
192
+ # Load the model and set generation parameters
193
+ model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
194
+ model_continue.set_generation_params(
195
+ use_sampling=True,
196
+ top_k=250,
197
+ top_p=0.0,
198
+ temperature=1.0,
199
+ duration=calculate_duration(bpm),
200
+ cfg_coef=3
201
+ )
202
+
203
+ original_audio = AudioSegment.from_mp3(input_audio_path)
204
+ current_audio = original_audio
205
+
206
+ file_paths_for_cleanup = [] # List to track generated file paths for cleanup
207
+
208
+ for i in range(num_iterations):
209
+ # Calculate the slice from the end of the current audio based on prompt_duration
210
+ num_samples = int(prompt_duration * sr)
211
+ if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
212
+ raise ValueError("The prompt_duration is longer than the current audio length.")
213
+
214
+ start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
215
+ prompt_audio = current_audio[start_time:]
216
+
217
+ # Convert the prompt audio to a PyTorch tensor
218
+ prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_audio.export(format="wav")))
219
+ prompt_waveform = prompt_waveform.to(device)
220
+
221
+ # Prepare the audio slice for generation
222
+ prompt_waveform = preprocess_audio(prompt_waveform)
223
+
224
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
225
+ output = output.cpu() # Move the output tensor back to CPU
226
+
227
+ if len(output.size()) > 2:
228
+ output = output.squeeze()
229
+
230
+ filename_without_extension = f'continue_{i}'
231
+ filename_with_extension = f'{filename_without_extension}.wav'
232
+ correct_filename_extension = f'{filename_without_extension}.wav.wav' # Apply the workaround for audio_write
233
+
234
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
235
+ generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
236
+
237
+ # Replace the prompt portion with the generated audio
238
+ current_audio = current_audio[:start_time] + generated_audio_segment
239
+
240
+ file_paths_for_cleanup.append(correct_filename_extension) # Add to cleanup list
241
+
242
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
243
+ current_audio.export(combined_audio_filename, format="mp3")
244
+
245
+ # Clean up temporary files using the list of file paths
246
+ for file_path in file_paths_for_cleanup:
247
+ os.remove(file_path)
248
+
249
+ return combined_audio_filename
250
+
251
+
252
+
253
+ # Define the expandable sections
254
+ musiclang_blurb = """
255
+ ## musiclang
256
+ musiclang is a controllable ai midi model. it can generate midi sequences based on user-provided parameters, or unconditionally.
257
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> musiclang github](https://github.com/MusicLang/musiclang_predict)
258
+ [<img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg" alt="Hugging Face" width="20" style="vertical-align:middle"> musiclang huggingface space](https://huggingface.co/spaces/musiclang/musiclang-predict)
259
+ """
260
+
261
+ musicgen_blurb = """
262
+ ## musicgen
263
+ musicgen is a transformer-based music model that generates audio. It can also do something called a continuation, which was initially meant to extend musicgen outputs beyond 30 seconds. it can be used with any input audio to produce surprising results.
264
+ [<img src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" alt="GitHub" width="20" style="vertical-align:middle"> audiocraft github](https://github.com/facebookresearch/audiocraft)
265
+ visit https://thecollabagepatch.com/infinitepolo.mp3 or https://thecollabagepatch.com/audiocraft.mp3 to hear continuations in action.
266
+ see also https://youtube.com/@thecollabagepatch
267
+ """
268
+
269
+ finetunes_blurb = """
270
+ ## fine-tuned models
271
+ the fine-tunes hosted on the huggingface hub are provided collectively by the musicgen discord community. thanks to vanya, mj, hoenn, septicDNB and of course, lyra.
272
+ [<img src="https://cdn.iconscout.com/icon/free/png-256/discord-3691244-3073764.png" alt="Discord" width="20" style="vertical-align:middle"> musicgen discord](https://discord.gg/93kX8rGZ)
273
+ [<img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" style="vertical-align:middle"> fine-tuning colab notebook by lyra](https://colab.research.google.com/drive/13tbcC3A42KlaUZ21qvUXd25SFLu8WIvb)
274
+ """
275
+
276
+ # Create the Gradio interface
277
+ with gr.Blocks() as iface:
278
+ gr.Markdown("# the-slot-machine")
279
+ gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
280
+ gr.Markdown("this is a musical slot machine. using musiclang, we get a midi output. then, we let a musicgen model continue, semi-randomly, from different sections of the midi track. the slot machine combines em all at the end into something very bizarre. pick a number for the seed between 1 and 10k, or leave it blank to unlock the full rnjesus powers. if you wanna be lame, you can control the chord progression, prompt duration, musicgen model, number of iterations, and BPM.")
281
+
282
+ with gr.Accordion("more info", open=False):
283
+ gr.Markdown(musiclang_blurb)
284
+ gr.Markdown(musicgen_blurb)
285
+ gr.Markdown(finetunes_blurb)
286
+
287
+ with gr.Row():
288
+ with gr.Column():
289
+ seed = gr.Textbox(label="Seed (leave blank for random)", value="")
290
+ use_chords = gr.Checkbox(label="Control Chord Progression", value=False)
291
+ chord_progression = gr.Textbox(label="Chord Progression (e.g., Am CM Dm E7 Am)", visible=True)
292
+ bpm = gr.Slider(label="BPM", minimum=60, maximum=200, step=1, value=120)
293
+ generate_midi_button = gr.Button("Generate MIDI")
294
+ midi_audio = gr.Audio(label="Generated MIDI Audio", type="filepath") # Ensure this is set to handle file paths
295
+
296
+ with gr.Column():
297
+ prompt_duration = gr.Dropdown(label="Prompt Duration (seconds)", choices=list(range(1, 11)), value=5)
298
+ musicgen_model = gr.Dropdown(label="MusicGen Model", choices=[
299
+ "thepatch/vanya_ai_dnb_0.1 (small)",
300
+ "thepatch/budots_remix (small)",
301
+ "thepatch/PhonkV2 (small)",
302
+ "thepatch/bleeps-medium (medium)",
303
+ "thepatch/hoenn_lofi (large)"
304
+ ], value="thepatch/vanya_ai_dnb_0.1 (small)")
305
+ num_iterations = gr.Slider(label="this does nothing rn", minimum=1, maximum=1, step=1, value=1)
306
+ generate_music_button = gr.Button("Generate Music")
307
+ output_audio = gr.Audio(label="Generated Music", type="filepath")
308
+ continue_button = gr.Button("Continue Generating Music")
309
+ continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
310
+
311
+ # Connecting the components
312
+ generate_midi_button.click(generate_midi, inputs=[seed, use_chords, chord_progression, bpm], outputs=[midi_audio])
313
+ generate_music_button.click(generate_music, inputs=[midi_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=[output_audio])
314
+ continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, num_iterations, bpm], outputs=continue_output_audio)
315
+
316
  iface.launch()