Spaces:
Running
on
Zero
Running
on
Zero
Commit
Β·
694aa5b
1
Parent(s):
59f0768
decapitalizing
Browse files
app.py
CHANGED
@@ -339,86 +339,87 @@ def calculate_optimal_bars(bpm):
|
|
339 |
|
340 |
# ========== GRADIO INTERFACE ==========
|
341 |
|
342 |
-
with gr.Blocks(title="
|
343 |
-
gr.Markdown("#
|
344 |
-
gr.Markdown("**
|
345 |
|
346 |
# ========== MODELS & PROJECT INFO ==========
|
347 |
-
with gr.Accordion("
|
348 |
|
349 |
with gr.Accordion("π stable-audio-open-small", open=False):
|
350 |
gr.Markdown("""
|
351 |
**stable-audio-open-small** is an incredibly fast model from the zachs and friends at Stability AI. It's capable of generating 12 seconds of audio in under a second, which gives rise to a lot of very interesting kinds of UX.
|
352 |
|
353 |
-
**
|
354 |
|
355 |
-
**
|
356 |
-
- π€ [
|
357 |
-
|
|
|
358 |
""")
|
359 |
|
360 |
-
with gr.Accordion("ποΈ
|
361 |
gr.Markdown("""
|
362 |
-
**MelodyFlow** is a model by
|
363 |
|
364 |
-
It's not officially a part of the audiocraft repo yet, but we use it as a docker container in the backend for gary4live.
|
365 |
|
366 |
-
**
|
367 |
-
- π€ [MelodyFlow Space](https://huggingface.co/spaces/Facebook/MelodyFlow)
|
368 |
-
|
|
|
369 |
""")
|
370 |
|
371 |
-
with gr.Accordion("
|
372 |
gr.Markdown("""
|
373 |
-
**gary4live** is a free/open source project that uses these models, along with
|
374 |
|
375 |
-
**
|
376 |
-
-
|
377 |
-
-
|
378 |
|
379 |
-
**
|
380 |
-
-
|
381 |
""")
|
382 |
|
383 |
-
with gr.Accordion("
|
384 |
gr.Markdown("""
|
385 |
-
**
|
386 |
-
1. **
|
387 |
-
2. **
|
388 |
-
3. **
|
389 |
-
4. **
|
390 |
-
5. **
|
391 |
-
|
392 |
-
**
|
393 |
-
-
|
394 |
-
-
|
395 |
-
-
|
396 |
-
- MelodyFlow integration for advanced style transfer
|
397 |
""")
|
398 |
|
399 |
# ========== GLOBAL CONTROLS ==========
|
400 |
-
gr.Markdown("## ποΈ
|
401 |
|
402 |
with gr.Row():
|
403 |
global_bpm = gr.Dropdown(
|
404 |
-
label="
|
405 |
choices=[90, 100, 110, 120, 130, 140, 150],
|
406 |
value=120,
|
407 |
-
info="
|
408 |
)
|
409 |
|
410 |
global_bars = gr.Dropdown(
|
411 |
-
label="
|
412 |
-
choices=[1, 2, 4
|
413 |
value=4,
|
414 |
-
info="
|
415 |
)
|
416 |
|
417 |
base_prompt = gr.Textbox(
|
418 |
-
label="
|
419 |
-
value="
|
420 |
-
placeholder="e.g., 'techno', '
|
421 |
-
info="
|
422 |
)
|
423 |
|
424 |
# Auto-suggest optimal bars based on BPM
|
@@ -429,64 +430,64 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
429 |
global_bpm.change(update_suggested_bars, inputs=[global_bpm], outputs=[global_bars])
|
430 |
|
431 |
# ========== LOOP GENERATION ==========
|
432 |
-
gr.Markdown("##
|
433 |
|
434 |
with gr.Row():
|
435 |
with gr.Column():
|
436 |
-
gr.Markdown("###
|
437 |
-
generate_drums_btn = gr.Button("
|
438 |
-
drums_audio = gr.Audio(label="
|
439 |
-
drums_status = gr.Textbox(label="
|
440 |
|
441 |
with gr.Column():
|
442 |
-
gr.Markdown("###
|
443 |
-
generate_instruments_btn = gr.Button("
|
444 |
-
instruments_audio = gr.Audio(label="
|
445 |
-
instruments_status = gr.Textbox(label="
|
446 |
|
447 |
# Seed controls
|
448 |
with gr.Row():
|
449 |
-
drums_seed = gr.Number(label="
|
450 |
-
instruments_seed = gr.Number(label="
|
451 |
|
452 |
# ========== COMBINATION ==========
|
453 |
-
gr.Markdown("##
|
454 |
|
455 |
with gr.Row():
|
456 |
num_repeats = gr.Slider(
|
457 |
-
label="
|
458 |
minimum=1,
|
459 |
maximum=5,
|
460 |
step=1,
|
461 |
value=2,
|
462 |
-
info="
|
463 |
)
|
464 |
-
combine_btn = gr.Button("
|
465 |
|
466 |
-
combined_audio = gr.Audio(label="
|
467 |
-
combine_status = gr.Textbox(label="
|
468 |
|
469 |
# ========== MELODYFLOW TRANSFORMATION ==========
|
470 |
-
gr.Markdown("##
|
471 |
|
472 |
with gr.Row():
|
473 |
with gr.Column():
|
474 |
transform_prompt = gr.Textbox(
|
475 |
-
label="
|
476 |
value="aggressive industrial techno with distorted sounds",
|
477 |
-
placeholder="
|
478 |
lines=2
|
479 |
)
|
480 |
|
481 |
with gr.Column():
|
482 |
transform_solver = gr.Dropdown(
|
483 |
-
label="
|
484 |
choices=["euler", "midpoint"],
|
485 |
value="euler",
|
486 |
info="EULER: faster (25 steps), MIDPOINT: slower (64 steps)"
|
487 |
)
|
488 |
transform_flowstep = gr.Slider(
|
489 |
-
label="
|
490 |
minimum=0.0,
|
491 |
maximum=0.15,
|
492 |
step=0.01,
|
@@ -494,9 +495,9 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
494 |
info="Lower = more dramatic transformation"
|
495 |
)
|
496 |
|
497 |
-
transform_btn = gr.Button("
|
498 |
-
transformed_audio = gr.Audio(label="
|
499 |
-
transform_status = gr.Textbox(label="
|
500 |
|
501 |
# ========== EVENT HANDLERS ==========
|
502 |
|
@@ -528,19 +529,19 @@ with gr.Blocks(title="π΅ Stable Audio Loop Generator") as iface:
|
|
528 |
outputs=[transformed_audio, transform_status]
|
529 |
)
|
530 |
|
531 |
-
# ========== EXAMPLES ==========
|
532 |
-
gr.Markdown("## π― Example Workflows")
|
533 |
|
534 |
-
examples = gr.Examples(
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
)
|
544 |
|
545 |
if __name__ == "__main__":
|
546 |
iface.launch()
|
|
|
339 |
|
340 |
# ========== GRADIO INTERFACE ==========
|
341 |
|
342 |
+
with gr.Blocks(title="stable-melodyflow") as iface:
|
343 |
+
gr.Markdown("# stable-melodyflow (aka jerry and terry)")
|
344 |
+
gr.Markdown("**generate synchronized drum and instrument loops with stable-audio-open-small (jerry), then transform with melodyflow (terry)!**")
|
345 |
|
346 |
# ========== MODELS & PROJECT INFO ==========
|
347 |
+
with gr.Accordion(" some info about these models", open=False):
|
348 |
|
349 |
with gr.Accordion("π stable-audio-open-small", open=False):
|
350 |
gr.Markdown("""
|
351 |
**stable-audio-open-small** is an incredibly fast model from the zachs and friends at Stability AI. It's capable of generating 12 seconds of audio in under a second, which gives rise to a lot of very interesting kinds of UX.
|
352 |
|
353 |
+
**note about generation speed in this zerogpu space:** you'll notice generation times are a little slower here than if you were to use the model on a local gpu. that's just a result of the way zerogpu spaces work i think... let me know if there's a way to keep the model loaded in a zerogpu space!
|
354 |
|
355 |
+
**links:**
|
356 |
+
- π€ [model on HuggingFace](https://huggingface.co/stabilityai/stable-audio-open-small)
|
357 |
+
there's a docker container at this repo that can be spun up as a standalone api specifically for stable-audio-open-small:
|
358 |
+
- [stable-audio-api](https://github.com/betweentwomidnights/stable-audio-api)
|
359 |
""")
|
360 |
|
361 |
+
with gr.Accordion("ποΈ melodyflow", open=False):
|
362 |
gr.Markdown("""
|
363 |
+
**MelodyFlow** is a model by meta that can use regularized latent inversion to do transformations of input audio.
|
364 |
|
365 |
+
It's not officially a part of the audiocraft repo yet, but we use it as a docker container in the backend for gary4live. i really enjoy turning my guitar riffs into orchestra.
|
366 |
|
367 |
+
**links:**
|
368 |
+
- π€ [Official MelodyFlow Space](https://huggingface.co/spaces/Facebook/MelodyFlow)
|
369 |
+
|
370 |
+
- [our melodyflow api](https://github.com/betweentwomidnights/melodyflow)
|
371 |
""")
|
372 |
|
373 |
+
with gr.Accordion("gary4live Project", open=False):
|
374 |
gr.Markdown("""
|
375 |
+
**gary4live** is a free/open source project that uses these models, along with musicGen, inside of ableton live to iterate on your projects with you. i run a backend myself so that we can all experiment with it, but you can also spin the backend up locally using docker-compose with our repo.
|
376 |
|
377 |
+
**project Links:**
|
378 |
+
- [frontend repo](https://github.com/betweentwomidnights/gary4live)
|
379 |
+
- [backend repo](https://github.com/betweentwomidnights/gary-backend-combined)
|
380 |
|
381 |
+
**installers:**
|
382 |
+
- [p.c. & mac installers on gumroad](https://thepatch.gumroad.com/l/gary4live)
|
383 |
""")
|
384 |
|
385 |
+
with gr.Accordion("how this works", open=False):
|
386 |
gr.Markdown("""
|
387 |
+
**workflow:**
|
388 |
+
1. **set global bpm and bars** - affects both drum and instrument generation
|
389 |
+
2. **generate drum loop** - creates BPM-aware percussion with negative prompting to attempt to get rid of instruments
|
390 |
+
3. **generate instrument loop** - creates melodic/harmonic content with negative prompting to attempt to get rid of drums
|
391 |
+
4. **combine loops** - layer them together with repetitions (up to 30s)
|
392 |
+
5. **transform** - use melodyflow to stylistically transform the combined result
|
393 |
+
|
394 |
+
**features:**
|
395 |
+
- bpm-aware generation ensures perfect sync between loops (most the time lol)
|
396 |
+
- negative prompting separates drums from instruments (most the time)
|
397 |
+
- smart bar calculation optimizes loop length for the BPM
|
|
|
398 |
""")
|
399 |
|
400 |
# ========== GLOBAL CONTROLS ==========
|
401 |
+
gr.Markdown("## ποΈ global settings")
|
402 |
|
403 |
with gr.Row():
|
404 |
global_bpm = gr.Dropdown(
|
405 |
+
label="global bpm",
|
406 |
choices=[90, 100, 110, 120, 130, 140, 150],
|
407 |
value=120,
|
408 |
+
info="bpm applied to both drum and instrument generation. keep this the same for the combine step to work correctly"
|
409 |
)
|
410 |
|
411 |
global_bars = gr.Dropdown(
|
412 |
+
label="loop length (bars)",
|
413 |
+
choices=[1, 2, 4],
|
414 |
value=4,
|
415 |
+
info="number of bars for each loop. keep this the same for both pieces of audio"
|
416 |
)
|
417 |
|
418 |
base_prompt = gr.Textbox(
|
419 |
+
label="base prompt",
|
420 |
+
value="lofi hiphop with pianos",
|
421 |
+
placeholder="e.g., 'aggressive techno', 'lofi hiphop', 'chillwave', 'liquid drum and bass'",
|
422 |
+
info="prompt applied to either loop. make it more drum/instrument specific for best results"
|
423 |
)
|
424 |
|
425 |
# Auto-suggest optimal bars based on BPM
|
|
|
430 |
global_bpm.change(update_suggested_bars, inputs=[global_bpm], outputs=[global_bars])
|
431 |
|
432 |
# ========== LOOP GENERATION ==========
|
433 |
+
gr.Markdown("## step one: generate individual loops")
|
434 |
|
435 |
with gr.Row():
|
436 |
with gr.Column():
|
437 |
+
gr.Markdown("### drums")
|
438 |
+
generate_drums_btn = gr.Button("generate drums", variant="primary", size="lg")
|
439 |
+
drums_audio = gr.Audio(label="drum loop", type="filepath")
|
440 |
+
drums_status = gr.Textbox(label="status", value="ready to generate")
|
441 |
|
442 |
with gr.Column():
|
443 |
+
gr.Markdown("### instruments")
|
444 |
+
generate_instruments_btn = gr.Button("generate instruments", variant="secondary", size="lg")
|
445 |
+
instruments_audio = gr.Audio(label="instrument loop", type="filepath")
|
446 |
+
instruments_status = gr.Textbox(label="status", value="Ready to generate")
|
447 |
|
448 |
# Seed controls
|
449 |
with gr.Row():
|
450 |
+
drums_seed = gr.Number(label="drums seed", value=-1, info="-1 for random")
|
451 |
+
instruments_seed = gr.Number(label="instruments seed", value=-1, info="-1 for random")
|
452 |
|
453 |
# ========== COMBINATION ==========
|
454 |
+
gr.Markdown("## step two: combine loops")
|
455 |
|
456 |
with gr.Row():
|
457 |
num_repeats = gr.Slider(
|
458 |
+
label="number of repetitions",
|
459 |
minimum=1,
|
460 |
maximum=5,
|
461 |
step=1,
|
462 |
value=2,
|
463 |
+
info="how many times to repeat each loop (creates longer audio). aim for 30 seconds max"
|
464 |
)
|
465 |
+
combine_btn = gr.Button("combine", variant="primary", size="lg")
|
466 |
|
467 |
+
combined_audio = gr.Audio(label="combined loops", type="filepath")
|
468 |
+
combine_status = gr.Textbox(label="status", value="Generate loops first")
|
469 |
|
470 |
# ========== MELODYFLOW TRANSFORMATION ==========
|
471 |
+
gr.Markdown("## step three: transform with melodyflow")
|
472 |
|
473 |
with gr.Row():
|
474 |
with gr.Column():
|
475 |
transform_prompt = gr.Textbox(
|
476 |
+
label="transformation prompt",
|
477 |
value="aggressive industrial techno with distorted sounds",
|
478 |
+
placeholder="describe the style of transformation",
|
479 |
lines=2
|
480 |
)
|
481 |
|
482 |
with gr.Column():
|
483 |
transform_solver = gr.Dropdown(
|
484 |
+
label="solver",
|
485 |
choices=["euler", "midpoint"],
|
486 |
value="euler",
|
487 |
info="EULER: faster (25 steps), MIDPOINT: slower (64 steps)"
|
488 |
)
|
489 |
transform_flowstep = gr.Slider(
|
490 |
+
label="transform intensity",
|
491 |
minimum=0.0,
|
492 |
maximum=0.15,
|
493 |
step=0.01,
|
|
|
495 |
info="Lower = more dramatic transformation"
|
496 |
)
|
497 |
|
498 |
+
transform_btn = gr.Button("transform audio", variant="secondary", size="lg")
|
499 |
+
transformed_audio = gr.Audio(label="transformed audio", type="filepath")
|
500 |
+
transform_status = gr.Textbox(label="status", value="Combine audio first")
|
501 |
|
502 |
# ========== EVENT HANDLERS ==========
|
503 |
|
|
|
529 |
outputs=[transformed_audio, transform_status]
|
530 |
)
|
531 |
|
532 |
+
# # ========== EXAMPLES ==========
|
533 |
+
# gr.Markdown("## π― Example Workflows")
|
534 |
|
535 |
+
# examples = gr.Examples(
|
536 |
+
# examples=[
|
537 |
+
# ["techno", 128, 4, "aggressive industrial techno"],
|
538 |
+
# ["jazz", 110, 2, "smooth lo-fi jazz with vinyl crackle"],
|
539 |
+
# ["ambient", 90, 8, "ethereal ambient soundscape"],
|
540 |
+
# ["hip-hop", 100, 4, "classic boom bap hip-hop"],
|
541 |
+
# ["drum and bass", 140, 4, "liquid drum and bass"],
|
542 |
+
# ],
|
543 |
+
# inputs=[base_prompt, global_bpm, global_bars, transform_prompt],
|
544 |
+
# )
|
545 |
|
546 |
if __name__ == "__main__":
|
547 |
iface.launch()
|