Spaces:
Running
on
T4
Running
on
T4
Add Background Image to make music easily shareable on FB as video
Browse files- app.py +7 -4
- app_batched.py +3 -1
- assets/background.png +0 -0
- audiocraft/utils/extend.py +1 -1
app.py
CHANGED
@@ -25,8 +25,9 @@ def load_model(version):
|
|
25 |
return MusicGen.get_pretrained(version)
|
26 |
|
27 |
|
28 |
-
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef):
|
29 |
-
global MODEL
|
|
|
30 |
topk = int(topk)
|
31 |
if MODEL is None or MODEL.name != model:
|
32 |
MODEL = load_model(model)
|
@@ -77,7 +78,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
|
|
77 |
audio_write(
|
78 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
79 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
80 |
-
waveform_video = gr.make_waveform(file.name)
|
81 |
return waveform_video
|
82 |
|
83 |
|
@@ -105,6 +106,8 @@ def ui(**kwargs):
|
|
105 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
106 |
with gr.Row():
|
107 |
submit = gr.Button("Submit")
|
|
|
|
|
108 |
with gr.Row():
|
109 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
110 |
with gr.Row():
|
@@ -117,7 +120,7 @@ def ui(**kwargs):
|
|
117 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
118 |
with gr.Column():
|
119 |
output = gr.Video(label="Generated Music")
|
120 |
-
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef], outputs=[output])
|
121 |
gr.Examples(
|
122 |
fn=predict,
|
123 |
examples=[
|
|
|
25 |
return MusicGen.get_pretrained(version)
|
26 |
|
27 |
|
28 |
+
def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background):
|
29 |
+
global MODEL
|
30 |
+
output_segments = None
|
31 |
topk = int(topk)
|
32 |
if MODEL is None or MODEL.name != model:
|
33 |
MODEL = load_model(model)
|
|
|
78 |
audio_write(
|
79 |
file.name, output, MODEL.sample_rate, strategy="loudness",
|
80 |
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
|
81 |
+
waveform_video = gr.make_waveform(file.name,bg_image=background, bar_count=40)
|
82 |
return waveform_video
|
83 |
|
84 |
|
|
|
106 |
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
|
107 |
with gr.Row():
|
108 |
submit = gr.Button("Submit")
|
109 |
+
with gr.Row():
|
110 |
+
background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
|
111 |
with gr.Row():
|
112 |
model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
|
113 |
with gr.Row():
|
|
|
120 |
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
|
121 |
with gr.Column():
|
122 |
output = gr.Video(label="Generated Music")
|
123 |
+
submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background], outputs=[output])
|
124 |
gr.Examples(
|
125 |
fn=predict,
|
126 |
examples=[
|
app_batched.py
CHANGED
@@ -40,6 +40,8 @@ def predict(texts, melodies):
|
|
40 |
processed_melodies.append(None)
|
41 |
else:
|
42 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
|
|
|
|
|
43 |
if melody.dim() == 1:
|
44 |
melody = melody[None]
|
45 |
melody = melody[..., :int(sr * duration)]
|
@@ -50,7 +52,7 @@ def predict(texts, melodies):
|
|
50 |
descriptions=texts,
|
51 |
melody_wavs=processed_melodies,
|
52 |
melody_sample_rate=target_sr,
|
53 |
-
progress=
|
54 |
)
|
55 |
|
56 |
outputs = outputs.detach().cpu().float()
|
|
|
40 |
processed_melodies.append(None)
|
41 |
else:
|
42 |
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
|
43 |
+
duration = min(duration, melody.shape[-1] / sr)
|
44 |
+
MODEL.set_generation_params(duration=duration)
|
45 |
if melody.dim() == 1:
|
46 |
melody = melody[None]
|
47 |
melody = melody[..., :int(sr * duration)]
|
|
|
52 |
descriptions=texts,
|
53 |
melody_wavs=processed_melodies,
|
54 |
melody_sample_rate=target_sr,
|
55 |
+
progress=True
|
56 |
)
|
57 |
|
58 |
outputs = outputs.detach().cpu().float()
|
assets/background.png
ADDED
audiocraft/utils/extend.py
CHANGED
@@ -45,7 +45,7 @@ def generate_music_segments(text, melody, MODEL, duration:int=10, segment_durati
|
|
45 |
|
46 |
# Iterate over the segments to create list of Meldoy tensors
|
47 |
for segment_idx in range(total_segments):
|
48 |
-
print(f"segment {segment_idx} of {total_segments} \r")
|
49 |
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
|
50 |
|
51 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|
|
|
45 |
|
46 |
# Iterate over the segments to create list of Meldoy tensors
|
47 |
for segment_idx in range(total_segments):
|
48 |
+
print(f"segment {segment_idx + 1} of {total_segments} \r")
|
49 |
sr, verse = melody_segments[segment_idx][0], torch.from_numpy(melody_segments[segment_idx][1]).to(MODEL.device).float().t().unsqueeze(0)
|
50 |
|
51 |
print(f"shape:{verse.shape} dim:{verse.dim()}")
|