SongGeneration / app.py
waytan22's picture
add auto prompt and interface
d658154
raw
history blame
9.18 kB
import gradio as gr
import json
from datetime import datetime
import yaml
import time
import os.path as op
from download import download_model
from levo_inference import LeVoInference
# 下载模型
APP_DIR = op.dirname(op.abspath(__file__))
download_model(APP_DIR)
print("Successful downloaded model.")
# 模型初始化
MODEL = LeVoInference(op.join(APP_DIR, "ckpt/songgeneration_base_zn/"))
EXAMPLE_LYRICS = """
[intro-short]
[verse]
雪花舞动在无尽的天际
情缘如同雪花般轻轻逝去
希望与真挚
永不磨灭
你的忧虑
随风而逝
[chorus]
我怀抱着守护这片梦境
在这世界中寻找爱与虚幻
苦辣酸甜
我们一起品尝
在雪的光芒中
紧紧相拥
[inst-short]
[verse]
雪花再次在风中飘扬
情愿如同雪花般消失无踪
希望与真挚
永不消失
在痛苦与喧嚣中
你找到解脱
[chorus]
我环绕着守护这片梦境
在这世界中感受爱与虚假
苦辣酸甜
我们一起分享
在白银的光芒中
我们同在
[outro-short]
""".strip()
with open(op.join(APP_DIR, 'conf/vocab.yaml'), 'r', encoding='utf-8') as file:
STRUCTS = yaml.safe_load(file)
# 模拟歌曲生成函数
def generate_song(lyric, description=None, prompt_audio=None, genre=None, cfg_coef=None, temperature=None, top_k=None, progress=gr.Progress(track_tqdm=True)):
global MODEL
global STRUCTS
params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
params = {k:v for k,v in params.items() if v is not None}
sample_rate = MODEL.cfg.sample_rate
# 适配lyric格式
lyric = lyric.replace("[intro]", "[intro-short]").replace("[inst]", "[inst-short]").replace("[outro]", "[outro-short]")
lyric = lyric.replace("\n\n", " ; ")
for s in STRUCTS:
lyric = lyric.replace(f"{s}\n", f"{s} ")
lyric = lyric.replace("\n", ".")
lyric = lyric.replace(". ; ", " ; ")
# 适配prompt
if prompt_audio is not None:
genre = None
description = None
elif description is not None and description != "":
genre = None
progress(0.0, "Start Generation")
start = time.time()
audio_data = MODEL(lyric, description, prompt_audio, genre, op.join(APP_DIR, "ckpt/prompt.pt"), params).cpu().permute(1, 0).float().numpy()
end = time.time()
# 创建输入配置的JSON
input_config = {
"lyric": lyric,
"genre": genre,
"prompt_audio": prompt_audio,
"description": description,
"params": params,
"inference_duration": end - start,
"timestamp": datetime.now().isoformat(),
}
print(input_config)
return (sample_rate, audio_data), json.dumps(input_config, indent=2)
# 创建Gradio界面
with gr.Blocks(title="SongGeration Demo Space") as demo:
gr.Markdown("# 🎵 SongGeration Demo Space")
gr.Markdown("Demo interface for the song generation model. Provide a lyrics, and optionally an audio or text prompt, to generate a custom song.")
with gr.Row():
with gr.Column():
lyric = gr.Textbox(
label="Lyrics",
lines=5,
max_lines=15,
value=EXAMPLE_LYRICS,
info="Support lyric structure tags like [verse], [chorus], and [bridge] to separate different parts of the lyrics. Use [intro] [outro] [inst] to generate instrumental music.",
placeholder="""Lyric Format
'''
[structure tag]
lyrics
[structure tag]
lyrics
'''
1. One paragraph represents one section, starting with a structure tag and ending with a blank line
2. One line represents one lyric line, punctuation is not recommended inside the line
3. Structure tags can be chosen from the following list
- '[verse]'
- '[chorus]'
- '[bridge]'
- '[intro-short]'
- '[intro-medium]'
- '[intro-long]'
- '[outro-short]'
- '[outro-medium]'
- '[outro-long]'
- '[inst-short]'
- '[inst-medium]'
- '[inst-long]'
- '[silence]'
"""
)
with gr.Tabs(elem_id="extra-tabs"):
with gr.Tab("Genre Select"):
genre = gr.Radio(
choices=["Auto", "Pop", "R&B", "Dance", "Jazz", "Folk", "Rock", "Chinese Style", "Chinese Tradition", "Metal", "Reggae", "Chinese Opera"],
label="Genre Select(Optional)",
value="Auto", # 默认选中第一个
interactive=True,
elem_id="single-select-radio" # 便于自定义样式
)
with gr.Tab("Audio Prompt"):
prompt_audio = gr.Audio(
label="Prompt Audio (Optional)",
type="filepath",
elem_id="audio-prompt"
)
with gr.Tab("Text Prompt"):
description = gr.Textbox(
label="Song Description (Optional)",
info="Describe the gender, timbre, genre, emotion, instrument and bpm of the song",
placeholder="female, dark, pop, sad, piano and drums, the bpm is 125.",
lines=1,
max_lines=2
)
with gr.Accordion("Advanced Config", open=False):
cfg_coef = gr.Slider(
label="CFG Coefficient",
minimum=0.1,
maximum=3.0,
step=0.1,
value=1.5,
interactive=True,
elem_id="cfg-coef",
)
temperature = gr.Slider(
label="Temperature",
minimum=0.1,
maximum=2.0,
step=0.1,
value=0.9,
interactive=True,
elem_id="temperature",
)
top_k = gr.Slider(
label="Top-K",
minimum=1,
maximum=100,
step=1,
value=50,
interactive=True,
elem_id="top_k",
)
generate_btn = gr.Button("Generate Song", variant="primary")
with gr.Column():
output_audio = gr.Audio(label="Generated Song", type="numpy")
output_json = gr.JSON(label="Input Configuration")
# # 示例按钮
# examples = gr.Examples(
# examples=[
# ["male, bright, rock, happy, electric guitar and drums, the bpm is 150."],
# ["female, warm, jazz, romantic, synthesizer and piano, the bpm is 100."]
# ],
# inputs=[description],
# label="Text Prompt examples"
# )
# examples = gr.Examples(
# examples=[
# "[intro-medium]\n\n[verse]\n在这个疯狂的世界里\n谁不渴望一点改变\n在爱情面前\n我们都显得那么不安全\n你紧紧抱着我\n告诉我再靠近一点\n别让这璀璨的夜晚白白浪费\n我那迷茫的眼睛\n看不见未来的路\n在情感消散之前\n我们对爱的渴望永不熄灭\n你给我留下一句誓言\n想知道我们的爱是否能持续到永远\n[chorus]\n\n约定在那最后的夜晚\n不管命运如何摆布\n我们的心是否依然如初\n我会穿上红衬衫\n带着摇滚的激情\n回到我们初遇的地方\n约定在那最后的夜晚\n就算全世界都变了样\n我依然坚守诺言\n铭记这一天\n你永远是我心中的爱恋\n\n[outro-medium]\n",
# "[intro-short]\n\n[verse]\nThrough emerald canyons where fireflies dwell\nCerulean berries kiss morning's first swell\nCrystalline dew crowns each Vitamin Dawn's confection dissolves slowly on me\nAmbrosia breezes through honeycomb vines\nNature's own candy in Fibonacci lines\n[chorus] Blueberry fruit so sweet\n takes you higher\n can't be beat\n In your lungs\n it starts to swell\n You're under its spell\n [verse] Resin of sunlight in candied retreat\nMarmalade moonbeams melt under bare feet\nNectar spirals bloom chloroplast champagne\nPhotosynthesis sings through my veins\nChlorophyll rhythms pulse warm in my blood\nThe forest's green pharmacy floods every bud[chorus] Blueberry fruit so sweet\n takes you higher\n can't be beat\n In your lungs\n it starts to swell\n You're under its spell\n feel the buzz\n ride the wave\n Limey me\n blueberry\n your mind's enslaved\n In the haze\n lose all time\n floating free\n feeling fine\n Blueberry\n fruit so sweet\n takes you higher\n can't be beat\n In your lungs\n it starts to swell\n cry\n You're under its spell\n\n[outro-short]\n",
# ],
# inputs=[lyric],
# label="Lyrics examples",
# )
# 生成按钮点击事件
generate_btn.click(
fn=generate_song,
inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, top_k],
outputs=[output_audio, output_json]
)
# 启动应用
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)