Spaces:
Running
on
L40S
Running
on
L40S
hainazhu
commited on
Commit
·
8e684f6
1
Parent(s):
b3a289b
prompt audio & advanced cfg
Browse files
app.py
CHANGED
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
6 |
import os
|
7 |
import sys
|
8 |
import librosa
|
|
|
9 |
import os.path as op
|
10 |
APP_DIR = op.dirname(op.abspath(__file__))
|
11 |
|
@@ -56,23 +57,35 @@ R&B的节奏奏响.
|
|
56 |
|
57 |
|
58 |
# 模拟歌曲生成函数
|
59 |
-
def generate_song(description, lyric, prompt_audio=None):
|
60 |
global model
|
61 |
-
|
|
|
|
|
|
|
62 |
print(f"Generating song with description: {description}")
|
63 |
print(f"Lyrics provided: {lyric}")
|
64 |
if prompt_audio is not None:
|
65 |
print("Using prompt audio for generation")
|
|
|
|
|
66 |
|
67 |
sample_rate = model.cfg.sample_rate
|
|
|
|
|
|
|
68 |
|
69 |
-
audio_data = model(lyric, description,
|
|
|
|
|
70 |
|
71 |
# 创建输入配置的JSON
|
72 |
input_config = {
|
73 |
"description": description,
|
74 |
"lyric": lyric,
|
75 |
-
"
|
|
|
|
|
76 |
"timestamp": datetime.now().isoformat(),
|
77 |
}
|
78 |
|
@@ -108,11 +121,33 @@ with gr.Blocks(title="LeVo Demo Space") as demo:
|
|
108 |
elem_id="audio-prompt"
|
109 |
)
|
110 |
with gr.Tab("Advanced Config"):
|
111 |
-
|
112 |
-
|
113 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
)
|
115 |
-
|
116 |
generate_btn = gr.Button("Generate Song", variant="primary")
|
117 |
|
118 |
with gr.Column():
|
@@ -141,7 +176,7 @@ with gr.Blocks(title="LeVo Demo Space") as demo:
|
|
141 |
# 生成按钮点击事件
|
142 |
generate_btn.click(
|
143 |
fn=generate_song,
|
144 |
-
inputs=[description, lyric, prompt_audio],
|
145 |
outputs=[output_audio, output_json]
|
146 |
)
|
147 |
|
|
|
6 |
import os
|
7 |
import sys
|
8 |
import librosa
|
9 |
+
import time
|
10 |
import os.path as op
|
11 |
APP_DIR = op.dirname(op.abspath(__file__))
|
12 |
|
|
|
57 |
|
58 |
|
59 |
# 模拟歌曲生成函数
|
60 |
+
def generate_song(description, lyric, prompt_audio=None, cfg_coef=None, temperature=None, top_k=None, progress=gr.Progress(track_tqdm=True)):
|
61 |
global model
|
62 |
+
params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k}
|
63 |
+
params = {k:v for k,v in params.items() if v is not None}
|
64 |
+
|
65 |
+
# 生成过程
|
66 |
print(f"Generating song with description: {description}")
|
67 |
print(f"Lyrics provided: {lyric}")
|
68 |
if prompt_audio is not None:
|
69 |
print("Using prompt audio for generation")
|
70 |
+
else:
|
71 |
+
prompt_audio = op.join(APP_DIR, 'sample/19_2-又是一天过去,烦恼如影随形10s.wav')
|
72 |
|
73 |
sample_rate = model.cfg.sample_rate
|
74 |
+
|
75 |
+
progress(0.0, "Start Generation")
|
76 |
+
start = time.time()
|
77 |
|
78 |
+
audio_data = model(lyric, description, prompt_audio, params).cpu().permute(1, 0).float().numpy()
|
79 |
+
|
80 |
+
end = time.time()
|
81 |
|
82 |
# 创建输入配置的JSON
|
83 |
input_config = {
|
84 |
"description": description,
|
85 |
"lyric": lyric,
|
86 |
+
"prompt_audio": prompt_audio,
|
87 |
+
"params": params,
|
88 |
+
"inference_duration": end - start,
|
89 |
"timestamp": datetime.now().isoformat(),
|
90 |
}
|
91 |
|
|
|
121 |
elem_id="audio-prompt"
|
122 |
)
|
123 |
with gr.Tab("Advanced Config"):
|
124 |
+
cfg_coef = gr.Slider(
|
125 |
+
label="CFG Coefficient",
|
126 |
+
minimum=0.1,
|
127 |
+
maximum=3.0,
|
128 |
+
step=0.1,
|
129 |
+
value=1.5,
|
130 |
+
interactive=True,
|
131 |
+
elem_id="cfg-coef",
|
132 |
+
)
|
133 |
+
temperature = gr.Slider(
|
134 |
+
label="Temperature",
|
135 |
+
minimum=0.1,
|
136 |
+
maximum=2.0,
|
137 |
+
step=0.1,
|
138 |
+
value=1.0,
|
139 |
+
interactive=True,
|
140 |
+
elem_id="temperature",
|
141 |
+
)
|
142 |
+
top_k = gr.Slider(
|
143 |
+
label="Top-K",
|
144 |
+
minimum=1,
|
145 |
+
maximum=100,
|
146 |
+
step=1,
|
147 |
+
value=50,
|
148 |
+
interactive=True,
|
149 |
+
elem_id="top_k",
|
150 |
)
|
|
|
151 |
generate_btn = gr.Button("Generate Song", variant="primary")
|
152 |
|
153 |
with gr.Column():
|
|
|
176 |
# 生成按钮点击事件
|
177 |
generate_btn.click(
|
178 |
fn=generate_song,
|
179 |
+
inputs=[description, lyric, prompt_audio, cfg_coef, temperature, top_k],
|
180 |
outputs=[output_audio, output_json]
|
181 |
)
|
182 |
|