Spaces:
Running
on
Zero
Running
on
Zero
import os | |
import sys | |
import gradio as gr | |
from inspiremusic.cli.inference import InspireMusicUnified, set_env_variables | |
import torchaudio | |
import datetime | |
import hashlib | |
def generate_filename(): | |
now = datetime.datetime.now() | |
seconds_since_epoch = int(now.timestamp()) | |
# Convert seconds to string | |
seconds_str = str(seconds_since_epoch) | |
# Hash the string using SHA-256 | |
hash_object = hashlib.sha256(seconds_str.encode()) | |
hash_string = hash_object.hexdigest() | |
return hash_string | |
def get_args( | |
task, text="", audio=None, model_name="InspireMusic-Base", | |
chorus="intro", | |
output_sample_rate=48000, max_generate_audio_seconds=30.0, time_start = 0.0, time_end=30.0, trim=False): | |
if output_sample_rate == 24000: | |
fast = True | |
else: | |
fast = False | |
# This function constructs the arguments required for InspireMusic | |
args = { | |
"task" : task, | |
"text" : text, | |
"audio_prompt" : audio, | |
"model_name" : model_name, | |
"chorus" : chorus, | |
"fast" : fast, | |
"fade_out" : True, | |
"trim" : trim, | |
"output_sample_rate" : output_sample_rate, | |
"min_generate_audio_seconds": 10.0, | |
"max_generate_audio_seconds": max_generate_audio_seconds, | |
"model_dir" : os.path.join("pretrained_models", | |
model_name), | |
"result_dir" : "exp/inspiremusic", | |
"output_fn" : generate_filename(), | |
"format" : "wav", | |
"time_start" : time_start, | |
"time_end": time_end, | |
"fade_out_duration": 1.0, | |
} | |
if args["time_start"] is None: | |
args["time_start"] = 0.0 | |
args["time_end"] = args["time_start"] + args["max_generate_audio_seconds"] | |
print(args) | |
return args | |
def music_generation(args): | |
set_env_variables() | |
model = InspireMusicUnified( | |
model_name=args["model_name"], | |
model_dir=args["model_dir"], | |
min_generate_audio_seconds=args["min_generate_audio_seconds"], | |
max_generate_audio_seconds=args["max_generate_audio_seconds"], | |
sample_rate=24000, | |
output_sample_rate=args["output_sample_rate"], | |
load_jit=True, | |
load_onnx=False, | |
fast=args["fast"], | |
result_dir=args["result_dir"]) | |
output_path = model.inference( | |
task=args["task"], | |
text=args["text"], | |
audio_prompt=args["audio_prompt"], | |
chorus=args["chorus"], | |
time_start=args["time_start"], | |
time_end=args["time_end"], | |
output_fn=args["output_fn"], | |
max_audio_prompt_length=args["max_audio_prompt_length"], | |
fade_out_duration=args["fade_out_duration"], | |
output_format=args["format"], | |
fade_out_mode=args["fade_out"], | |
trim=args["trim"]) | |
return output_path | |
def update_text(): | |
global text_input # Declare as global to modify the outer scope variable | |
text_input = "New value set by button click" | |
return text_input | |
default_prompts = [ | |
"Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.", | |
"Compose an uplifting R&B song.", | |
"Create an emotional, introspective folk song with acoustic guitar and soft vocals." | |
] | |
def cut_audio(audio_file, cut_seconds=5): | |
audio, sr = torchaudio.load(audio_file) | |
num_samples = cut_seconds * sr | |
cutted_audio = audio[:, :num_samples] | |
output_path = os.path.join(os.getcwd(), "audio_prompt_" + generate_filename() + ".wav") | |
torchaudio.save(output_path, cutted_audio, sr) | |
return output_path | |
def run_text2music(text, model_name, chorus, | |
output_sample_rate, max_generate_audio_seconds): | |
args = get_args( | |
task='continuation', text=text, audio=None, | |
model_name=model_name, chorus=chorus, | |
output_sample_rate=output_sample_rate, | |
max_generate_audio_seconds=max_generate_audio_seconds) | |
return music_generation(args) | |
def run_continuation(text, audio, model_name, chorus, | |
output_sample_rate, max_generate_audio_seconds): | |
args = get_args( | |
task='text-to-music', text=text, audio=cut_audio(audio, cut_seconds=5), | |
model_name=model_name, chorus=chorus, | |
output_sample_rate=output_sample_rate, | |
max_generate_audio_seconds=max_generate_audio_seconds) | |
return music_generation(args) | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown(""" | |
# InspireMusic | |
- Support text-to-music, music continuation, audio super-resolution, audio reconstruction tasks with high audio quality, with available sampling rates of 24kHz, 48kHz. | |
- Support long audio generation in multiple output audio formats, i.e., wav, flac, mp3, m4a. | |
- Open-source [InspireMusic-Base](https://modelscope.cn/models/iic/InspireMusic/summary), [InspireMusic-Base-24kHz](https://modelscope.cn/models/iic/InspireMusic-Base-24kHz/summary), [InspireMusic-1.5B](https://modelscope.cn/models/iic/InspireMusic-1.5B/summary), [InspireMusic-1.5B-24kHz](https://modelscope.cn/models/iic/InspireMusic-1.5B-24kHz/summary), [InspireMusic-1.5B-Long](https://modelscope.cn/models/iic/InspireMusic-1.5B-Long/summary) models for music generation. | |
- Currently only support English text prompts. | |
""") | |
with gr.Row(equal_height=True): | |
model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base") | |
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"], | |
label="Chorus Mode", value="intro") | |
output_sample_rate = gr.Dropdown([48000, 24000], | |
label="Output Audio Sample Rate (Hz)", | |
value=48000) | |
max_generate_audio_seconds = gr.Slider(10, 120, | |
label="Generate Audio Length (s)", | |
value=30) | |
# with gr.Column(): | |
# fast = gr.Checkbox(label="Fast Inference", value=False) | |
# fade_out = gr.Checkbox(label="Apply Fade Out Effect", value=True) | |
with gr.Row(equal_height=True): | |
# Textbox for custom input | |
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.") | |
audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)", | |
type="filepath") | |
music_output = gr.Audio(label="Generated Music", type="filepath") | |
with gr.Row(): | |
button = gr.Button("Text to Music") | |
button.click(run_text2music, | |
inputs=[text_input, model_name, | |
chorus, | |
output_sample_rate, | |
max_generate_audio_seconds], | |
outputs=music_output) | |
generate_button = gr.Button("Music Continuation") | |
generate_button.click(run_continuation, | |
inputs=[text_input, audio_input, model_name, | |
chorus, | |
output_sample_rate, | |
max_generate_audio_seconds], | |
outputs=music_output) | |
with gr.Column(): | |
default_prompt_buttons = [] | |
for prompt in default_prompts: | |
button = gr.Button(value=prompt) | |
button.click(run_text2music, | |
inputs=[text_input, model_name, | |
chorus, | |
output_sample_rate, | |
max_generate_audio_seconds], | |
outputs=music_output) | |
default_prompt_buttons.append(button) | |
demo.launch() | |