Spaces:

SethyYann98
/

Jukebox

Runtime error

App Files Files Community

Jukebox / app.py

SethyYann98

Jukebox

b97d304 verified 5 months ago

raw

history blame contribute delete

4.18 kB

	!nvidia-smi


	import jukebox
	import torch as t
	import librosa
	import os
	from IPython.display import Audio
	from jukebox.make_models import make_vqvae, make_prior, MODELS, make_model
	from jukebox.hparams import Hyperparams, setup_hparams
	from jukebox.sample import sample_single_window, _sample, \
	sample_partial_window, upsample
	from jukebox.utils.dist_utils import setup_dist_from_mpi
	from jukebox.utils.torch_utils import empty_cache
	rank, local_rank, device = setup_dist_from_mpi()


	model = "5b_lyrics" # or "1b_lyrics"
	hps = Hyperparams()
	hps.sr = 44100
	hps.n_samples = 3 if model=='5b_lyrics' else 8
	hps.name = 'samples'
	chunk_size = 16 if model=="5b_lyrics" else 32
	max_batch_size = 3 if model=="5b_lyrics" else 16
	hps.levels = 3
	hps.hop_fraction = [.5,.5,.125]

	vqvae, *priors = MODELS[model]
	vqvae = make_vqvae(setup_hparams(vqvae, dict(sample_length = 1048576)), device)
	top_prior = make_prior(setup_hparams(priors[-1], dict()), vqvae, device)


	sample_length_in_seconds = 60 # Full length of musical sample to generate - we find songs in the 1 to 4 minute
	# range work well, with generation time proportional to sample length.
	# This total length affects how quickly the model
	# progresses through lyrics (model also generates differently
	# depending on if it thinks it's in the beginning, middle, or end of sample)

	hps.sample_length = (int(sample_length_in_secondshps.sr)//top_prior.raw_to_tokens)top_prior.raw_to_tokens
	assert hps.sample_length >= top_prior.n_ctx*top_prior.raw_to_tokens, f'Please choose a larger sampling rate'
	metas = [dict(artist = "Zac Brown Band",
	genre = "Country",
	total_length = hps.sample_length,
	offset = 0,
	lyrics = """I met a traveller from an antique land,
	Who said—“Two vast and trunkless legs of stone
	Stand in the desert. . . . Near them, on the sand,
	Half sunk a shattered visage lies, whose frown,
	And wrinkled lip, and sneer of cold command,
	Tell that its sculptor well those passions read
	Which yet survive, stamped on these lifeless things,
	The hand that mocked them, and the heart that fed;
	And on the pedestal, these words appear:
	My name is Ozymandias, King of Kings;
	Look on my Works, ye Mighty, and despair!
	Nothing beside remains. Round the decay
	Of that colossal Wreck, boundless and bare
	The lone and level sands stretch far away
	""",
	),
	] * hps.n_samples
	labels = [None, None, top_prior.labeller.get_batch_labels(metas, 'cuda')]
	# Set this False if you are on a local machine that has enough memory (this allows you to do the
	# lyrics alignment visualization during the upsampling stage). For a hosted runtime,
	# we'll need to go ahead and delete the top_prior if you are using the 5b_lyrics model.
	if True:
	del top_prior
	empty_cache()
	top_prior=None
	upsamplers = [make_prior(setup_hparams(prior, dict()), vqvae, 'cpu') for prior in priors[:-1]]
	labels[:2] = [prior.labeller.get_batch_labels(metas, 'cuda') for prior in upsamplers]
	zs = upsample(zs, labels, sampling_kwargs, [*upsamplers, top_prior], hps)


	Audio(f'{hps.name}/level_2/item_0.wav')
	sampling_temperature = .98

	lower_batch_size = 16
	max_batch_size = 3 if model == "5b_lyrics" else 16
	lower_level_chunk_size = 32
	chunk_size = 16 if model == "5b_lyrics" else 32
	sampling_kwargs = [dict(temp=.99, fp16=True, max_batch_size=lower_batch_size,
	chunk_size=lower_level_chunk_size),
	dict(temp=0.99, fp16=True, max_batch_size=lower_batch_size,
	chunk_size=lower_level_chunk_size),
	dict(temp=sampling_temperature, fp16=True,
	max_batch_size=max_batch_size, chunk_size=chunk_size)]
	del upsamplers
	empty_cache()
	Audio(f'{hps.name}/level_0/item_0.wav')