Spaces:
Running
on
Zero
Running
on
Zero
File size: 7,112 Bytes
b93c24a 37b6092 96fe5d9 ff944f7 37b6092 b93c24a f107ce2 9af664a 1fc806b 9af664a b93c24a 9af664a b93c24a 1343135 b93c24a 4e5c199 b93c24a 37b6092 b93c24a 1343135 b93c24a 1343135 1fc806b 1343135 37b6092 b93c24a 1343135 b93c24a 1343135 37a3fc6 1343135 b93c24a 1343135 b93c24a 1343135 b93c24a 1343135 b93c24a 1343135 4543aed 319e13c 1343135 b93c24a 9af664a 698dade fd194a3 9af664a 698dade 9af664a a55649b b93c24a fd194a3 9af664a b93c24a 65d160d b93c24a 1343135 6eb0a02 b93c24a 65d160d cc6f94a b93c24a cc6f94a 032bd6f b93c24a 5891900 b93c24a 5891900 9af664a 65d160d 0e88d4d b93c24a 5891900 0e88d4d 5891900 1a0c7fe 032bd6f 4543aed fd194a3 9af664a 698dade 4543aed 698dade fd194a3 9af664a b93c24a 96fe5d9 1a0c7fe 9af664a 1a0c7fe 96fe5d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import os
import sys
import gradio as gr
from inspiremusic.cli.inference import InspireMusicUnified, set_env_variables
import torchaudio
import datetime
import hashlib
def generate_filename():
now = datetime.datetime.now()
seconds_since_epoch = int(now.timestamp())
# Convert seconds to string
seconds_str = str(seconds_since_epoch)
# Hash the string using SHA-256
hash_object = hashlib.sha256(seconds_str.encode())
hash_string = hash_object.hexdigest()
return hash_string
def get_args(
task, text="", audio=None, model_name="InspireMusic-Base",
chorus="intro",
output_sample_rate=48000, max_generate_audio_seconds=30.0, time_start = 0.0, time_end=30.0, trim=False):
if output_sample_rate == 24000:
fast = True
else:
fast = False
# This function constructs the arguments required for InspireMusic
args = {
"task" : task,
"text" : text,
"audio_prompt" : audio,
"model_name" : model_name,
"chorus" : chorus,
"fast" : fast,
"fade_out" : True,
"trim" : trim,
"output_sample_rate" : output_sample_rate,
"min_generate_audio_seconds": 10.0,
"max_generate_audio_seconds": max_generate_audio_seconds,
"model_dir" : os.path.join("pretrained_models",
model_name),
"result_dir" : "exp/inspiremusic",
"output_fn" : generate_filename(),
"format" : "wav",
"time_start" : time_start,
"time_end": time_end,
"fade_out_duration": 1.0,
}
if args["time_start"] is None:
args["time_start"] = 0.0
args["time_end"] = args["time_start"] + args["max_generate_audio_seconds"]
print(args)
return args
def music_generation(args):
set_env_variables()
model = InspireMusicUnified(
model_name=args["model_name"],
model_dir=args["model_dir"],
min_generate_audio_seconds=args["min_generate_audio_seconds"],
max_generate_audio_seconds=args["max_generate_audio_seconds"],
sample_rate=24000,
output_sample_rate=args["output_sample_rate"],
load_jit=True,
load_onnx=False,
fast=args["fast"],
result_dir=args["result_dir"])
output_path = model.inference(
task=args["task"],
text=args["text"],
audio_prompt=args["audio_prompt"],
chorus=args["chorus"],
time_start=args["time_start"],
time_end=args["time_end"],
output_fn=args["output_fn"],
max_audio_prompt_length=args["max_audio_prompt_length"],
fade_out_duration=args["fade_out_duration"],
output_format=args["format"],
fade_out_mode=args["fade_out"],
trim=args["trim"])
return output_path
def update_text():
global text_input # Declare as global to modify the outer scope variable
text_input = "New value set by button click"
return text_input
default_prompts = [
"Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.",
"Compose an uplifting R&B song.",
"Create an emotional, introspective folk song with acoustic guitar and soft vocals."
]
def cut_audio(audio_file, cut_seconds=5):
audio, sr = torchaudio.load(audio_file)
num_samples = cut_seconds * sr
cutted_audio = audio[:, :num_samples]
output_path = os.path.join(os.getcwd(), "audio_prompt_" + generate_filename() + ".wav")
torchaudio.save(output_path, cutted_audio, sr)
return output_path
def run_text2music(text, model_name, chorus,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='continuation', text=text, audio=None,
model_name=model_name, chorus=chorus,
output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
def run_continuation(text, audio, model_name, chorus,
output_sample_rate, max_generate_audio_seconds):
args = get_args(
task='text-to-music', text=text, audio=cut_audio(audio, cut_seconds=5),
model_name=model_name, chorus=chorus,
output_sample_rate=output_sample_rate,
max_generate_audio_seconds=max_generate_audio_seconds)
return music_generation(args)
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# InspireMusic
- Support text-to-music, music continuation, audio super-resolution, audio reconstruction tasks with high audio quality, with available sampling rates of 24kHz, 48kHz.
- Support long audio generation in multiple output audio formats, i.e., wav, flac, mp3, m4a.
- Open-source [InspireMusic-Base](https://modelscope.cn/models/iic/InspireMusic/summary), [InspireMusic-Base-24kHz](https://modelscope.cn/models/iic/InspireMusic-Base-24kHz/summary), [InspireMusic-1.5B](https://modelscope.cn/models/iic/InspireMusic-1.5B/summary), [InspireMusic-1.5B-24kHz](https://modelscope.cn/models/iic/InspireMusic-1.5B-24kHz/summary), [InspireMusic-1.5B-Long](https://modelscope.cn/models/iic/InspireMusic-1.5B-Long/summary) models for music generation.
- Currently only support English text prompts.
""")
with gr.Row(equal_height=True):
model_name = gr.Dropdown(["InspireMusic-1.5B-Long", "InspireMusic-1.5B", "InspireMusic-1.5B-24kHz", "InspireMusic-Base", "InspireMusic-Base-24kHz"], label="Select Model Name", value="InspireMusic-Base")
chorus = gr.Dropdown(["intro", "verse", "chorus", "outro"],
label="Chorus Mode", value="intro")
output_sample_rate = gr.Dropdown([48000, 24000],
label="Output Audio Sample Rate (Hz)",
value=48000)
max_generate_audio_seconds = gr.Slider(10, 120,
label="Generate Audio Length (s)",
value=30)
# with gr.Column():
# fast = gr.Checkbox(label="Fast Inference", value=False)
# fade_out = gr.Checkbox(label="Apply Fade Out Effect", value=True)
with gr.Row(equal_height=True):
# Textbox for custom input
text_input = gr.Textbox(label="Input Text (For Text-to-Music Task)", value="Experience soothing and sensual instrumental jazz with a touch of Bossa Nova, perfect for a relaxing restaurant or spa ambiance.")
audio_input = gr.Audio(label="Input Audio Prompt (For Music Continuation Task)",
type="filepath")
music_output = gr.Audio(label="Generated Music", type="filepath")
with gr.Row():
button = gr.Button("Text to Music")
button.click(run_text2music,
inputs=[text_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
generate_button = gr.Button("Music Continuation")
generate_button.click(run_continuation,
inputs=[text_input, audio_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
with gr.Column():
default_prompt_buttons = []
for prompt in default_prompts:
button = gr.Button(value=prompt)
button.click(run_text2music,
inputs=[text_input, model_name,
chorus,
output_sample_rate,
max_generate_audio_seconds],
outputs=music_output)
default_prompt_buttons.append(button)
demo.launch()
|