zonic-generator

Sleeping

App Files Files Community

zonic-generator / app.py

charchits

new

45d94b5 8 months ago

raw

history blame contribute delete

3.75 kB

	from audiocraft.models import MusicGen
	import streamlit as st
	import os
	import torch
	import torchaudio
	import numpy as np
	import base64
	from dotenv import load_dotenv
	import google.generativeai as genai
	load_dotenv()

	genai.configure(api_key=os.getenv("API_KEY"))
	llm = genai.GenerativeModel("gemini-pro")

	@st.cache_resource
	def load_model():
	model = MusicGen.get_pretrained("facebook/musicgen-small")
	return model

	def generate_music_tensors(description, duration:int):
	print(f"Description: {description}")
	print(f"Duration: {duration}")
	model = load_model()

	model.set_generation_params(
	use_sampling=True,
	top_k=250,
	duration=duration
	)

	output = model.generate(
	descriptions=[description],
	progress=True,
	return_tokens=True
	)

	return output[0]

	def save_audio(samples: torch.Tensor):
	sample_rate = 32000
	save_path = "saved_audio/"

	assert samples.dim() == 2 or samples.dim() == 3
	samples = samples.detach().cpu()

	if samples.dim() == 2:
	samples = samples[None, ...]

	for idx, audio in enumerate(samples):
	audio_path = os.path.join(save_path, f"audio_{idx}.wav")
	torchaudio.save(audio_path, audio, sample_rate)

	def download_music(bin_file, file_label="File"):
	with open(bin_file, 'rb') as f:
	data = f.read()

	bin_str = base64.b64encode(data).decode()
	href = f'<a href="data:application/octet-stream;base64,{bin_str}" download="{os.path.basename(bin_file)}">Download {file_label}</a>'
	return href

	st.set_page_config(
	layout='wide',
	page_icon=":musical_note:",
	page_title="ZONIC: AI Music Generator"
	)

	def main():

	c1, c2, c3 = st.columns(3)
	c1.subheader("ZONIC: AI Music Generator")
	st.image('picture-of-abstract-music.jpg', caption='Modern and Abstract Music Generation', use_column_width=True)

	text_area = st.text_area("Enter your description...")
	time_slider = st.slider("Select time duration (in seconds)", 2, 20, 5)

	context = f"""Given the basic description of a prompt for a text-to-music generator below, enhance that prompt by using specific, direct, accurate and relevant vocabulary. This enhanced prompt must clearly assert and describe the kind of music user wants to generate, with the help of appropriate musical terminology or taxonomy. Craft a creative prompt that clearly explains the text-to-music model what music the user desires. DO NOT respond with anything other than the output prompt. You can be as creative as you like with the descriptions, but DO NOT make up details that the original prompt did not ask for. Also, make sure the description is not too lengthy, keep it concise. Your prompt must explain the flow of the music from start through the middle towards the finish, explicitly mentioning the way instruments are played and what they should sound like.

	ORIGINAL PROMPT : {text_area}
	YOUR OUTPUT PROMPT :
	"""
	llm_result = llm.generate_content(context)
	prompt = llm_result.text
	print("")

	if text_area and time_slider and len(text_area) > 0:
	st.json(
	{
	"Description": prompt,
	"Duration": time_slider
	}
	)

	st.subheader("Generated Music")

	music_tensors = generate_music_tensors(prompt, time_slider)
	print(f"Music Tensors: {music_tensors}")

	save_music_file = save_audio(music_tensors)

	audio_filepath = "saved_audio/audio_0.wav"
	audio_file = open(audio_filepath, 'rb')
	audio_bytes = audio_file.read()

	st.audio(audio_bytes)
	st.markdown(download_music(audio_filepath, 'Audio'), unsafe_allow_html=True)

	if __name__ == '__main__':
	main()