Spaces:
Running
on
T4
Running
on
T4
Add Interrupt Button
Browse files- app.py +20 -3
- audiocraft/utils/extend.py +5 -3
app.py
CHANGED
@@ -11,6 +11,8 @@ import argparse
|
|
11 |
import torch
|
12 |
import gradio as gr
|
13 |
import os
|
|
|
|
|
14 |
from audiocraft.models import MusicGen
|
15 |
from audiocraft.data.audio import audio_write
|
16 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
|
@@ -20,6 +22,19 @@ import random
|
|
20 |
MODEL = None
|
21 |
IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
|
22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
def load_model(version):
|
25 |
print("Loading model", version)
|
@@ -102,7 +117,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
102 |
output = output_segments[0]
|
103 |
for i in range(1, len(output_segments)):
|
104 |
overlap_samples = overlap * MODEL.sample_rate
|
105 |
-
output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=
|
106 |
output = output.detach().cpu().float()[0]
|
107 |
except Exception as e:
|
108 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
@@ -116,7 +131,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
116 |
audio_write(
|
117 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
118 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
119 |
-
waveform_video =
|
120 |
return waveform_video, seed
|
121 |
|
122 |
|
@@ -144,6 +159,8 @@ def ui(**kwargs):
|
|
144 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
145 |
with gr.Row():
|
146 |
submit = gr.Button("Submit")
|
|
|
|
|
147 |
with gr.Row():
|
148 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
149 |
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
@@ -156,7 +173,7 @@ def ui(**kwargs):
|
|
156 |
with gr.Row():
|
157 |
duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
|
158 |
overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
|
159 |
-
dimension = gr.Slider(minimum=-2, maximum=
|
160 |
with gr.Row():
|
161 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
162 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
|
|
11 |
import torch
|
12 |
import gradio as gr
|
13 |
import os
|
14 |
+
import time
|
15 |
+
import warnings
|
16 |
from audiocraft.models import MusicGen
|
17 |
from audiocraft.data.audio import audio_write
|
18 |
from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
|
|
|
22 |
MODEL = None
|
23 |
IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
|
24 |
|
25 |
+
def interrupt():
|
26 |
+
global INTERRUPTING
|
27 |
+
INTERRUPTING = True
|
28 |
+
|
29 |
+
|
30 |
+
def make_waveform(*args, **kwargs):
|
31 |
+
# Further remove some warnings.
|
32 |
+
be = time.time()
|
33 |
+
with warnings.catch_warnings():
|
34 |
+
warnings.simplefilter('ignore')
|
35 |
+
out = gr.make_waveform(*args, **kwargs)
|
36 |
+
print("Make a video took", time.time() - be)
|
37 |
+
return out
|
38 |
|
39 |
def load_model(version):
|
40 |
print("Loading model", version)
|
|
|
117 |
output = output_segments[0]
|
118 |
for i in range(1, len(output_segments)):
|
119 |
overlap_samples = overlap * MODEL.sample_rate
|
120 |
+
output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=dimension)
|
121 |
output = output.detach().cpu().float()[0]
|
122 |
except Exception as e:
|
123 |
print(f"Error combining segments: {e}. Using the first segment only.")
|
|
|
131 |
audio_write(
|
132 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
133 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
134 |
+
waveform_video = make_waveform(file.name,bg_image=background, bar_count=40)
|
135 |
return waveform_video, seed
|
136 |
|
137 |
|
|
|
159 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
160 |
with gr.Row():
|
161 |
submit = gr.Button("Submit")
|
162 |
+
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
|
163 |
+
_ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
|
164 |
with gr.Row():
|
165 |
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
166 |
include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
|
|
|
173 |
with gr.Row():
|
174 |
duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
|
175 |
overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
|
176 |
+
dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
|
177 |
with gr.Row():
|
178 |
topk = gr.Number(label="Top-k", value=250, interactive=True)
|
179 |
topp = gr.Number(label="Top-p", value=0, interactive=True)
|
audiocraft/utils/extend.py
CHANGED
@@ -30,7 +30,7 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1):
|
|
30 |
if total_samples > 0:
|
31 |
segment = audio_data[-segment_samples:]
|
32 |
segments.append((sr, segment))
|
33 |
-
|
34 |
return segments
|
35 |
|
36 |
def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
|
@@ -43,9 +43,11 @@ def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:
|
|
43 |
|
44 |
# Calculate the total number of segments
|
45 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
46 |
-
|
|
|
|
|
47 |
#calc excess duration
|
48 |
-
excess_duration = total_segments * segment_duration - duration
|
49 |
print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
|
50 |
|
51 |
# If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
|
|
|
30 |
if total_samples > 0:
|
31 |
segment = audio_data[-segment_samples:]
|
32 |
segments.append((sr, segment))
|
33 |
+
print(f"separate_audio_segments: {len(segments)} segments")
|
34 |
return segments
|
35 |
|
36 |
def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
|
|
|
43 |
|
44 |
# Calculate the total number of segments
|
45 |
total_segments = max(math.ceil(duration / segment_duration),1)
|
46 |
+
# account for overlap
|
47 |
+
duration = duration + (max((total_segments - 1),0) * overlap)
|
48 |
+
total_segments = max(math.ceil(duration / segment_duration),1)
|
49 |
#calc excess duration
|
50 |
+
excess_duration = segment_duration - (total_segments * segment_duration - duration)
|
51 |
print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
|
52 |
|
53 |
# If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
|