InspireMusic / app.py
chong.zhang
update
10dc8a5
raw
history blame
6.47 kB
import os
import sys
import gradio as gr
from inspiremusic.cli.inference import InspireMusicUnified, set_env_variables
import torchaudio
import datetime
import hashlib
def generate_filename():
now = datetime.datetime.now()
seconds_since_epoch = int(now.timestamp())
# Convert seconds to string
seconds_str = str(seconds_since_epoch)
# Hash the string using SHA-256
hash_object = hashlib.sha256(seconds_str.encode())
hash_string = hash_object.hexdigest()
return hash_string
def get_args(
task, text="", audio=None, model_name="InspireMusic-Base",
chorus="intro", fast=False, fade_out=True,
output_sample_rate=48000, max_generate_audio_seconds=30.0, time_start = 0.0, time_end=30.0, trim=False):
# This function constructs the arguments required for InspireMusic
args = {
"task" : task,
"text" : text,
"audio_prompt" : audio,
"model_name" : model_name,
"chorus" : chorus,
"fast" : fast,
"fade_out" : fade_out,
"trim" : trim,
"output_sample_rate" : output_sample_rate,
"min_generate_audio_seconds": 10.0,
"max_generate_audio_seconds": max_generate_audio_seconds,
"model_dir" : os.path.join("pretrained_models",
model_name),
"result_dir" : "exp/inspiremusic",
"output_fn" : generate_filename(),
"format" : "wav",
"time_start" : time_start,
"time_end": time_end,
"fade_out_duration": 1.0,
}
if args["time_start"] is None:
args["time_start"] = 0.0
args["time_end"] = args["time_start"] + args["max_generate_audio_seconds"]
print(args)
return args
def music_generation(args):
set_env_variables()
model = InspireMusicUnified(
model_name=args["model_name"],
model_dir=args["model_dir"],
min_generate_audio_seconds=args["min_generate_audio_seconds"],
max_generate_audio_seconds=args["max_generate_audio_seconds"],
sample_rate=24000,
output_sample_rate=args["output_sample_rate"],
load_jit=True,
load_onnx=False,
fast=args["fast"],
result_dir=args["result_dir"])
output_path = model.inference(
task=args["task"],
text=args["text"],
audio_prompt=args["audio_prompt"],
chorus=args["chorus"],
time_start=args["time_start"],
time_end=args["time_end"],
output_fn=args["output_fn"],
max_audio_prompt_length=args["max_audio_prompt_length"],
fade_out_duration=args["fade_out_duration"],
output_format=args["format"],
fade_out_mode=args["fade_out"],
trim=args["trim"])
return output_path
def update_text():
global text_input # Declare as global to modify the outer scope variable
text_input = "New value set by button click"
return text_input
default_prompts = [
"Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.",
"Relaxing evening tune",
"Upbeat workout track",
"Soothing meditation soundscape",
"Joyful dance rhythm"
]
def cut_audio(audio_file, cut_seconds=5):
audio, sr = torchaudio.load(audio_file)
num_samples = cut_seconds * sr
cutted_audio = audio[:, :num_samples]
output_path = os.path.join(os.getcwd(), "audio_prompt_" + generate_filename() + ".wav")
torchaudio.save(output_path, cutted_audio, sr)
return output_path
def run_text2music(text, model_name, chorus, fast, fade_out,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='continuation', text=text, audio=None,
model_name=model_name, chorus=chorus, fast=fast,
fade_out=fade_out, output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
def run_continuation(text, audio, model_name, chorus, fast, fade_out,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='text-to-music', text=text, audio=cut_audio(audio, cut_seconds=5),
model_name=model_name, chorus=chorus, fast=fast,
fade_out=fade_out, output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
with gr.Blocks() as demo:
gr.Markdown("""
# InspireMusic
Generate music using InspireMusic models with music generation tasks, i.e., "Text-to-Music", "Music Continuation".
""")
with gr.Row():
model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base")
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
label="Chorus Mode", value="intro")
output_sample_rate = gr.Dropdown([48000, 24000],
label="Output Audio Sample Rate (Hz)",
value=48000)
max_generate_audio_seconds = gr.Slider(10, 300,
label="Max Generated Audio Length (Seconds)",
value=30)
with gr.Column():
fast = gr.Checkbox(label="Fast Inference", value=False)
fade_out = gr.Checkbox(label="Apply Fade Out Effect", value=True)
# Textbox for custom input
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
audio_input = gr.Audio(label="Input Prompt Audio (For Music Continuation Task)",
type="filepath")
music_output = gr.Audio(label="Generated Music Result", type="filepath")
with gr.Row():
button = gr.Button("Text to Music")
button.click(run_text2music,
inputs=[text_input, model_name,
chorus, fast, fade_out,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
generate_button = gr.Button("Music Continuation")
generate_button.click(run_continuation,
inputs=[text_input, audio_input, model_name,
chorus, fast, fade_out,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
with gr.Column():
default_prompt_buttons = []
for prompt in default_prompts:
button = gr.Button(value=prompt)
button.click(run_text2music,
inputs = [text_input, model_name,
chorus, fast, fade_out,
output_sample_rate,
max_generate_audio_seconds],
outputs = music_output)
default_prompt_buttons.append(button)
demo.launch()