Spaces:

SethyYann98
/

Jukebox

Runtime error

App Files Files Community

SethyYann98 commited on Oct 28, 2024

Commit

b97d304

verified ·

1 Parent(s): ba5666f

Jukebox

Browse files

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+!nvidia-smi
+import jukebox
+import torch as t
+import librosa
+import os
+from IPython.display import Audio
+from jukebox.make_models import make_vqvae, make_prior, MODELS, make_model
+from jukebox.hparams import Hyperparams, setup_hparams
+from jukebox.sample import sample_single_window, _sample, \
+                           sample_partial_window, upsample
+from jukebox.utils.dist_utils import setup_dist_from_mpi
+from jukebox.utils.torch_utils import empty_cache
+rank, local_rank, device = setup_dist_from_mpi()
+model = "5b_lyrics" # or "1b_lyrics"
+hps = Hyperparams()
+hps.sr = 44100
+hps.n_samples = 3 if model=='5b_lyrics' else 8
+hps.name = 'samples'
+chunk_size = 16 if model=="5b_lyrics" else 32
+max_batch_size = 3 if model=="5b_lyrics" else 16
+hps.levels = 3
+hps.hop_fraction = [.5,.5,.125]
+vqvae, *priors = MODELS[model]
+vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length = 1048576)), device)
+top_prior = make_prior(setup_hparams(priors[-1], dict()), vqvae, device)
+     sample_length_in_seconds = 60          # Full length of musical sample to generate - we find songs in the 1 to 4 minute
+                                       # range work well, with generation time proportional to sample length.
+                                       # This total length affects how quickly the model
+                                       # progresses through lyrics (model also generates differently
+                                       # depending on if it thinks it's in the beginning, middle, or end of sample)
+hps.sample_length = (int(sample_length_in_seconds*hps.sr)//top_prior.raw_to_tokens)*top_prior.raw_to_tokens
+assert hps.sample_length >= top_prior.n_ctx*top_prior.raw_to_tokens, f'Please choose a larger sampling rate'
+     metas = [dict(artist = "Zac Brown Band",
+            genre = "Country",
+            total_length = hps.sample_length,
+            offset = 0,
+            lyrics = """I met a traveller from an antique land,
+            Who said—“Two vast and trunkless legs of stone
+            Stand in the desert. . . . Near them, on the sand,
+            Half sunk a shattered visage lies, whose frown,
+            And wrinkled lip, and sneer of cold command,
+            Tell that its sculptor well those passions read
+            Which yet survive, stamped on these lifeless things,
+            The hand that mocked them, and the heart that fed;
+            And on the pedestal, these words appear:
+            My name is Ozymandias, King of Kings;
+            Look on my Works, ye Mighty, and despair!
+            Nothing beside remains. Round the decay
+            Of that colossal Wreck, boundless and bare
+            The lone and level sands stretch far away
+            """,
+            ),
+          ] * hps.n_samples
+labels = [None, None, top_prior.labeller.get_batch_labels(metas, 'cuda')]
+     # Set this False if you are on a local machine that has enough memory (this allows you to do the
+# lyrics alignment visualization during the upsampling stage). For a hosted runtime,
+# we'll need to go ahead and delete the top_prior if you are using the 5b_lyrics model.
+if True:
+  del top_prior
+  empty_cache()
+  top_prior=None
+upsamplers = [make_prior(setup_hparams(prior, dict()), vqvae, 'cpu') for prior in priors[:-1]]
+labels[:2] = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in upsamplers]
+     zs = upsample(zs, labels, sampling_kwargs, [*upsamplers, top_prior], hps)
+Audio(f'{hps.name}/level_2/item_0.wav')
+     sampling_temperature = .98
+lower_batch_size = 16
+max_batch_size = 3 if model == "5b_lyrics" else 16
+lower_level_chunk_size = 32
+chunk_size = 16 if model == "5b_lyrics" else 32
+sampling_kwargs = [dict(temp=.99, fp16=True, max_batch_size=lower_batch_size,
+                        chunk_size=lower_level_chunk_size),
+                    dict(temp=0.99, fp16=True, max_batch_size=lower_batch_size,
+                         chunk_size=lower_level_chunk_size),
+                    dict(temp=sampling_temperature, fp16=True,
+                         max_batch_size=max_batch_size, chunk_size=chunk_size)]
+     del upsamplers
+empty_cache()
+Audio(f'{hps.name}/level_0/item_0.wav')