kevinwang676 commited on
Commit
c48371b
1 Parent(s): d8c3968

Create app_colab.py

Browse files
Files changed (1) hide show
  1. app_colab.py +301 -0
app_colab.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re, os
2
+ import requests
3
+ import json
4
+ import torch
5
+
6
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
7
+
8
+ headers = {
9
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
10
+ }
11
+ pattern = r'//www\.bilibili\.com/video[^"]*'
12
+
13
+ def get_bilibili_video_id(url):
14
+ match = re.search(r'/video/([a-zA-Z0-9]+)/', url)
15
+ extracted_value = match.group(1)
16
+ return extracted_value
17
+
18
+ # Get bilibili audio
19
+ def find_first_appearance_with_neighborhood(text, pattern):
20
+ match = re.search(pattern, text)
21
+
22
+ if match:
23
+ return match.group()
24
+ else:
25
+ return None
26
+
27
+ def search_bilibili(keyword):
28
+ if keyword.startswith("BV"):
29
+ req = requests.get("https://search.bilibili.com/all?keyword={}&duration=1".format(keyword), headers=headers).text
30
+ else:
31
+ req = requests.get("https://search.bilibili.com/all?keyword={}&duration=1&tids=3&page=1".format(keyword), headers=headers).text
32
+
33
+ video_link = "https:" + find_first_appearance_with_neighborhood(req, pattern)
34
+
35
+ return video_link
36
+
37
+ def get_response(html_url):
38
+ headers = {
39
+ "referer": "https://www.bilibili.com/",
40
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"
41
+ }
42
+ response = requests.get(html_url, headers=headers)
43
+ return response
44
+
45
+ def get_video_info(html_url):
46
+ response = get_response(html_url)
47
+ html_data = re.findall('<script>window.__playinfo__=(.*?)</script>', response.text)[0]
48
+ json_data = json.loads(html_data)
49
+ if json_data['data']['dash']['audio'][0]['backupUrl']!=None:
50
+ audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0]
51
+ else:
52
+ audio_url = json_data['data']['dash']['audio'][0]['baseUrl']
53
+ video_url = json_data['data']['dash']['video'][0]['baseUrl']
54
+ return audio_url, video_url
55
+
56
+ def save_audio(title, html_url):
57
+ audio_url = get_video_info(html_url)[0]
58
+ #video_url = get_video_info(html_url)[1]
59
+
60
+ audio_content = get_response(audio_url).content
61
+ #video_content = get_response(video_url).content
62
+
63
+ with open(title + '.mp3', mode='wb') as f:
64
+ f.write(audio_content)
65
+ print("音乐内容保存完成")
66
+ #with open(title + '.mp4', mode='wb') as f:
67
+ # f.write(video_content)
68
+ #print("视频内容保存完成"
69
+
70
+ from uvr5.vr import AudioPre
71
+ weight_uvr5_root = "uvr5/uvr_model"
72
+ uvr5_names = []
73
+ for name in os.listdir(weight_uvr5_root):
74
+ if name.endswith(".pth") or "onnx" in name:
75
+ uvr5_names.append(name.replace(".pth", ""))
76
+
77
+ func = AudioPre
78
+
79
+ pre_fun_hp2 = func(
80
+ agg=int(10),
81
+ model_path=os.path.join(weight_uvr5_root, "UVR-HP2.pth"),
82
+ device=device,
83
+ is_half=True,
84
+ )
85
+
86
+ pre_fun_hp5 = func(
87
+ agg=int(10),
88
+ model_path=os.path.join(weight_uvr5_root, "UVR-HP5.pth"),
89
+ device=device,
90
+ is_half=True,
91
+ )
92
+
93
+ import webrtcvad
94
+ from pydub import AudioSegment
95
+ from pydub.utils import make_chunks
96
+
97
+ import os
98
+ import librosa
99
+ import soundfile
100
+ import gradio as gr
101
+
102
+
103
+ def vad(audio_name):
104
+ audio = AudioSegment.from_file(audio_name, format="wav")
105
+ # Set the desired sample rate (WebRTC VAD supports only 8000, 16000, 32000, or 48000 Hz)
106
+ audio = audio.set_frame_rate(48000)
107
+ # Set single channel (mono)
108
+ audio = audio.set_channels(1)
109
+
110
+ # Initialize VAD
111
+ vad = webrtcvad.Vad()
112
+ # Set aggressiveness mode (an integer between 0 and 3, 3 is the most aggressive)
113
+ vad.set_mode(3)
114
+
115
+ # Convert pydub audio to bytes
116
+ frame_duration = 30 # Duration of a frame in ms
117
+ frame_width = int(audio.frame_rate * frame_duration / 1000) # width of a frame in samples
118
+ frames = make_chunks(audio, frame_duration)
119
+
120
+ # Perform voice activity detection
121
+ voiced_frames = []
122
+ for frame in frames:
123
+ if len(frame.raw_data) < frame_width * 2: # Ensure frame is correct length
124
+ break
125
+ is_speech = vad.is_speech(frame.raw_data, audio.frame_rate)
126
+ if is_speech:
127
+ voiced_frames.append(frame)
128
+
129
+ # Combine voiced frames back to an audio segment
130
+ voiced_audio = sum(voiced_frames, AudioSegment.silent(duration=0))
131
+
132
+ voiced_audio.export("voiced_audio.wav", format="wav")
133
+
134
+
135
+
136
+
137
+ def youtube_downloader(
138
+ video_identifier,
139
+ filename,
140
+ split_model,
141
+ start_time
142
+ ):
143
+ print(video_identifier)
144
+ video_info = get_video_info(video_identifier)[0]
145
+ print(video_info)
146
+ audio_content = get_response(video_info).content
147
+ with open(filename.strip() + ".wav", mode="wb") as f:
148
+ f.write(audio_content)
149
+ audio_path = filename.strip() + ".wav"
150
+ start_ms = start_time * 1000
151
+ end_ms = start_ms + 45000
152
+ # make dir output
153
+ os.makedirs("output", exist_ok=True)
154
+
155
+ if split_model=="UVR-HP2":
156
+ pre_fun = pre_fun_hp2
157
+ else:
158
+ pre_fun = pre_fun_hp5
159
+
160
+ audio_orig = AudioSegment.from_file(audio_path)
161
+ if len(audio_orig) > end_ms:
162
+
163
+ # Extract the segment
164
+ segment = audio_orig[start_ms:end_ms]
165
+ segment.export(filename.strip() + ".wav", format="wav")
166
+ pre_fun._path_audio_(filename.strip() + ".wav", f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
167
+ os.remove(filename.strip()+".wav")
168
+ else:
169
+ segment = audio_orig[start_ms:len(audio_orig)]
170
+ segment.export(filename.strip() + ".wav", format="wav")
171
+ pre_fun._path_audio_(filename.strip() + ".wav", f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
172
+ os.remove(filename.strip()+".wav")
173
+
174
+
175
+ return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
176
+
177
+
178
+ def youtube_downloader_100s(
179
+ video_identifier,
180
+ filename,
181
+ split_model
182
+ ):
183
+ print(video_identifier)
184
+ video_info = get_video_info(video_identifier)[0]
185
+ print(video_info)
186
+ audio_content = get_response(video_info).content
187
+ with open(filename.strip() + ".wav", mode="wb") as f:
188
+ f.write(audio_content)
189
+ audio_path = filename.strip() + ".wav"
190
+ if split_model=="UVR-HP2":
191
+ pre_fun = pre_fun_hp2
192
+ else:
193
+ pre_fun = pre_fun_hp5
194
+
195
+ os.makedirs("output", exist_ok=True)
196
+ audio_orig = AudioSegment.from_file(audio_path)
197
+
198
+ if len(audio_orig) > 120000:
199
+ start_ms = 10000
200
+ end_ms = start_ms + 110000
201
+
202
+ # Extract the segment
203
+
204
+ segment = audio_orig[start_ms:end_ms]
205
+
206
+ segment.export(filename.strip() + ".wav", format="wav")
207
+
208
+ pre_fun._path_audio_(filename.strip() + ".wav", f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
209
+ os.remove(filename.strip()+".wav")
210
+ else:
211
+ pre_fun._path_audio_(filename.strip() + ".wav", f"./output/{split_model}/{filename}/", f"./output/{split_model}/{filename}/", "wav")
212
+ os.remove(filename.strip()+".wav")
213
+
214
+ return f"./output/{split_model}/{filename}/vocal_{filename}.wav_10.wav", f"./output/{split_model}/{filename}/instrument_{filename}.wav_10.wav"
215
+
216
+
217
+ def convert(start_time, song_name_src, song_name_ref, check_song, key_shift, vocal_vol, inst_vol):
218
+ split_model = "UVR-HP5"
219
+ song_name_ref = song_name_ref.strip().replace(" ", "")
220
+ video_identifier = search_bilibili(song_name_ref)
221
+ song_id = get_bilibili_video_id(video_identifier)
222
+
223
+ song_name_src = song_name_src.strip().replace(" ", "")
224
+ video_identifier_src = search_bilibili(song_name_src)
225
+ song_id_src = get_bilibili_video_id(video_identifier_src)
226
+
227
+ if os.path.isdir(f"./output/{split_model}/{song_id}")==False:
228
+ audio, sr = librosa.load(youtube_downloader_100s(video_identifier, song_id, split_model)[0], sr=24000, mono=True)
229
+ soundfile.write("audio_ref.wav", audio, sr)
230
+ else:
231
+ audio, sr = librosa.load(f"./output/{split_model}/{song_id}/vocal_{song_id}.wav_10.wav", sr=24000, mono=True)
232
+ soundfile.write("audio_ref.wav", audio, sr)
233
+
234
+ vad("audio_ref.wav")
235
+
236
+ #if os.path.isdir(f"./output/{split_model}/{song_id_src}")==False:
237
+ audio_src, sr_src = librosa.load(youtube_downloader(video_identifier_src, song_id_src, split_model, start_time)[0], sr=24000, mono=True)
238
+ soundfile.write("audio_src.wav", audio_src, sr_src)
239
+ #else:
240
+ # audio_src, sr_src = librosa.load(f"./output/{split_model}/{song_id_src}/vocal_{song_id_src}.wav_10.wav", sr=24000, mono=True)
241
+ # soundfile.write("audio_src.wav", audio_src, sr_src)
242
+ if os.path.isfile("output_svc/NeuCoSVCv2.wav"):
243
+ os.remove("output_svc/NeuCoSVCv2.wav")
244
+
245
+ if check_song == True:
246
+ os.system(f"python inference.py --src_wav_path audio_src.wav --ref_wav_path voiced_audio.wav --key_shift {key_shift}")
247
+ else:
248
+ os.system(f"python inference.py --src_wav_path audio_src.wav --ref_wav_path voiced_audio.wav --key_shift {key_shift} --speech_enroll")
249
+
250
+ audio_vocal = AudioSegment.from_file("output_svc/NeuCoSVCv2.wav", format="wav")
251
+
252
+ # Load the second audio file
253
+ audio_inst = AudioSegment.from_file(f"output/{split_model}/{song_id_src}/instrument_{song_id_src}.wav_10.wav", format="wav")
254
+
255
+ audio_vocal = audio_vocal + vocal_vol # Increase volume of the first audio by 5 dB
256
+ audio_inst = audio_inst + inst_vol # Decrease volume of the second audio by 5 dB
257
+
258
+ # Concatenate audio files
259
+ combined_audio = audio_vocal.overlay(audio_inst)
260
+
261
+ # Export the concatenated audio to a new file
262
+ combined_audio.export(f"{song_name_src}-AI翻唱.wav", format="wav")
263
+
264
+ return f"{song_name_src}-AI翻唱.wav"
265
+
266
+
267
+
268
+ app = gr.Blocks()
269
+
270
+
271
+ with app:
272
+ gr.Markdown("# <center>🥳💕🎶 NeuCoSVC v2 AI歌手全明星,无需训练、一键翻唱、重磅更新!</center>")
273
+ gr.Markdown("## <center>🌟 只需 1 个歌曲名,一键翻唱任意歌手的任意歌曲,支持说话语音翻唱,随时随地,听你想听!</center>")
274
+ gr.Markdown("### <center>🌊 NeuCoSVC v2 先享版 Powered by Tencent ARC Lab & Tsinghua University 💕</center>")
275
+ with gr.Row():
276
+ with gr.Column():
277
+ with gr.Row():
278
+ inp1 = gr.Textbox(label="请填写想要AI翻唱的歌曲或BV号", info="直接填写BV号的得到的歌曲最匹配,也可以选择填写“歌曲名+歌手名”")
279
+ inp2 = gr.Textbox(label="请填写含有目标音色的歌曲或BV号", info="例如您希望使用AI周杰伦的音色,就在此处填写周杰伦的任意一首歌")
280
+ with gr.Row():
281
+ inp0 = gr.Number(value=0, label="起始时间 (秒)", info="此程序将自动从起始时间开始提取45秒的翻唱歌曲")
282
+ inp3 = gr.Checkbox(label="参考音频是否为歌曲演唱,默认为是", info="如果参考音频为正常说话语音,请取消打勾", value=True)
283
+ inp4 = gr.Slider(minimum=-12, maximum=12, value=0, step=1, label="歌曲人声升降调", info="默认为0,+2为升高2个key,以此类推")
284
+ with gr.Row():
285
+ inp5 = gr.Slider(minimum=-3, maximum=3, value=0, step=1, label="调节人声音量,默认为0")
286
+ inp6 = gr.Slider(minimum=-3, maximum=3, value=0, step=1, label="调节伴奏音量,默认为0")
287
+ btn = gr.Button("一键开启AI翻唱之旅吧💕", variant="primary")
288
+ with gr.Column():
289
+ out = gr.Audio(label="AI歌手为您倾情演唱的歌曲", type="filepath", interactive=True)
290
+
291
+ btn.click(convert, [inp0, inp1, inp2, inp3, inp4, inp5, inp6], out)
292
+
293
+ gr.Markdown("### <center>注意❗:请不要生成会对个人以及组织造成侵害的内容,此程序仅供科研、学习及个人娱乐使用。</center>")
294
+ gr.HTML('''
295
+ <div class="footer">
296
+ <p>🌊🏞️🎶 - 江水东流急,滔滔无尽声。 明·顾璘
297
+ </p>
298
+ </div>
299
+ ''')
300
+
301
+ app.queue().launch(share=True, show_error=True)