Spaces:

tencent
/

SongGeneration

Running on L40S

App Files Files Community

hainazhu commited on Jun 7

Commit

2644f3e

1 Parent(s): ceb0b97

Add infer code

Browse files

Files changed (5) hide show

.gitignore +3 -1
app.py +2 -0
ckpt/.gitkeep +0 -0
levo_inference.py +126 -0
third_party/.gitkeep +0 -0

.gitignore CHANGED Viewed

@@ -1,3 +1,5 @@
 launchs/
 **/__pycache__
-sample/generated/

 launchs/
 **/__pycache__
+sample/generated/
+.bash_history
+.config

app.py CHANGED Viewed

@@ -6,6 +6,8 @@ from datetime import datetime
 import os
 import sys
 import librosa
 EXAMPLE_DESC = """female, dark, pop, sad, piano and drums, the bpm is 125."""
 EXAMPLE_LYRICS = """

 import os
 import sys
 import librosa
+import os.path as op
+PROJ_DIR = os.path.dirname(os.path.abspath(__file__))
 EXAMPLE_DESC = """female, dark, pop, sad, piano and drums, the bpm is 125."""
 EXAMPLE_LYRICS = """

ckpt/.gitkeep ADDED Viewed

File without changes

levo_inference.py ADDED Viewed

	@@ -0,0 +1,126 @@

+import os
+import sys
+sys.path.append('./codeclm/tokenizer')
+sys.path.append('./codeclm/tokenizer/Flow1dVAE')
+sys.path.append('.')
+import torch
+import json
+from omegaconf import OmegaConf
+from codeclm.trainer.codec_song_pl import CodecLM_PL
+from codeclm.models import CodecLM
+from separator import Separator
+class LeVoInference(torch.nn.Module):
+    def __init__(self, cfg_path):
+        super().__init__()
+        torch.backends.cudnn.enabled = False
+        OmegaConf.register_new_resolver("eval", lambda x: eval(x))
+        OmegaConf.register_new_resolver("concat", lambda *x: [xxx for xx in x for xxx in xx])
+        OmegaConf.register_new_resolver("load_yaml", lambda x: list(OmegaConf.load(x)))
+        self.cfg = OmegaConf.load(cfg_path)
+        self.max_duration = self.cfg.max_dur
+        # Define model or load pretrained model
+        model_light = CodecLM_PL(self.cfg)
+        model_light = model_light.eval().cuda()
+        model_light.audiolm.cfg = self.cfg
+        self.model_lm = model_light.audiolm
+        self.model_audio_tokenizer = model_light.audio_tokenizer
+        self.model_seperate_tokenizer = model_light.seperate_tokenizer
+        self.model = CodecLM(name = "tmp",
+            lm = self.model_lm,
+            audiotokenizer = self.model_audio_tokenizer,
+            max_duration = self.max_duration,
+            seperate_tokenizer = self.model_seperate_tokenizer,
+        )
+        self.separator = Separator()
+        self.default_params = dict(
+            cfg_coef = 1.5,
+            temperature = 1.0,
+            top_k = 50,
+            top_p = 0.0,
+            record_tokens = True,
+            record_window = 50,
+            extend_stride = 5,
+            duration = self.max_duration,
+        )
+        self.model.set_generation_params(**self.default_params)
+    def forward(self, lyric: str, description: str, prompt_audio_path: os.PathLike = None, params = dict()):
+        params = {**self.default_params, **params}
+        self.model.set_generation_params(**params)
+        if prompt_audio_path is None:
+            pmt_wav, vocal_wav, bgm_wav = None, None, None
+        else:
+            pmt_wav, vocal_wav, bgm_wav = self.separator.run(prompt_audio_path)
+        generate_inp = {
+            'lyrics': [lyric.replace("  ", " ")],
+            'descriptions': [description],
+            'melody_wavs': pmt_wav,
+            'vocal_wavs': vocal_wav,
+            'bgm_wavs': bgm_wav,
+        }
+        with torch.autocast(device_type="cuda", dtype=torch.float16):
+            tokens = self.model.generate(**generate_inp, return_tokens=True)
+        if tokens.shape[-1] > 3000:
+            tokens = tokens[..., :3000]
+        with torch.no_grad():
+            wav_seperate = self.model.generate_audio(tokens, pmt_wav, vocal_wav, bgm_wav)
+        return wav_seperate[0]
+def build_levo_inference():
+    cfg_path = './conf/infer.yaml'
+    return LeVoInference(cfg_path)
+if __name__ == '__main__':
+    import sys
+    import os
+    import time
+    import json
+    import torchaudio
+    cfg_path = sys.argv[1]
+    save_dir = sys.argv[2]
+    input_jsonl = sys.argv[3]
+    model = LeVoInference(cfg_path)
+    os.makedirs(save_dir + "/audios", exist_ok=True)
+    with open(input_jsonl, "r") as fp:
+        lines = fp.readlines()
+    for line in lines:
+        item = json.loads(line)
+        target_wav_name = f"{save_dir}/audios/{item['idx']}.flac"
+        descriptions = item["descriptions"]
+        lyric = item["gt_lyric"]
+        prompt_audio_path = item['prompt_audio_path']
+        wav = model(lyric, descriptions, prompt_audio_path)
+        torchaudio.save(target_wav_name, wav.cpu().float(), model.cfg.sample_rate)

third_party/.gitkeep ADDED Viewed

File without changes