Advanced-RVC-Inference

Sleeping

App Files Files Community

smjain commited on Feb 28, 2024

Commit

ef5c9b8

verified ·

1 Parent(s): f8726c0

Upload 5 files

Browse files

Files changed (5) hide show

lib/data_utils.py +7 -12
lib/losses.py +1 -0
lib/mel_processing.py +4 -6
lib/process_ckpt.py +113 -126
lib/utils.py +33 -40

lib/data_utils.py CHANGED Viewed

@@ -1,15 +1,10 @@
-import os
-import traceback
-import logging
-logger = logging.getLogger(__name__)
 import numpy as np
 import torch
 import torch.utils.data
-from infer.lib.train.mel_processing import spectrogram_torch
-from infer.lib.train.utils import load_filepaths_and_text, load_wav_to_torch
 class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
@@ -43,7 +38,7 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
         for audiopath, text, pitch, pitchf, dv in self.audiopaths_and_text:
             if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
                 audiopaths_and_text_new.append([audiopath, text, pitch, pitchf, dv])
-                lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length))
         self.audiopaths_and_text = audiopaths_and_text_new
         self.lengths = lengths
@@ -113,7 +108,7 @@ class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
             try:
                 spec = torch.load(spec_filename)
             except:
-                logger.warning("%s %s", spec_filename, traceback.format_exc())
                 spec = spectrogram_torch(
                     audio_norm,
                     self.filter_length,
@@ -251,7 +246,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
         for audiopath, text, dv in self.audiopaths_and_text:
             if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
                 audiopaths_and_text_new.append([audiopath, text, dv])
-                lengths.append(os.path.getsize(audiopath) // (3 * self.hop_length))
         self.audiopaths_and_text = audiopaths_and_text_new
         self.lengths = lengths
@@ -305,7 +300,7 @@ class TextAudioLoader(torch.utils.data.Dataset):
             try:
                 spec = torch.load(spec_filename)
             except:
-                logger.warning("%s %s", spec_filename, traceback.format_exc())
                 spec = spectrogram_torch(
                     audio_norm,
                     self.filter_length,

+import os, traceback
 import numpy as np
 import torch
 import torch.utils.data
+from mel_processing import spectrogram_torch
+from utils import load_wav_to_torch, load_filepaths_and_text
 class TextAudioLoaderMultiNSFsid(torch.utils.data.Dataset):
         for audiopath, text, pitch, pitchf, dv in self.audiopaths_and_text:
             if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
                 audiopaths_and_text_new.append([audiopath, text, pitch, pitchf, dv])
+                lengths.append(os.path.getsize(audiopath) // (2 * self.hop_length))
         self.audiopaths_and_text = audiopaths_and_text_new
         self.lengths = lengths
             try:
                 spec = torch.load(spec_filename)
             except:
+                print(spec_filename, traceback.format_exc())
                 spec = spectrogram_torch(
                     audio_norm,
                     self.filter_length,
         for audiopath, text, dv in self.audiopaths_and_text:
             if self.min_text_len <= len(text) and len(text) <= self.max_text_len:
                 audiopaths_and_text_new.append([audiopath, text, dv])
+                lengths.append(os.path.getsize(audiopath) // (2 * self.hop_length))
         self.audiopaths_and_text = audiopaths_and_text_new
         self.lengths = lengths
             try:
                 spec = torch.load(spec_filename)
             except:
+                print(spec_filename, traceback.format_exc())
                 spec = spectrogram_torch(
                     audio_norm,
                     self.filter_length,

lib/losses.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import torch
 def feature_loss(fmap_r, fmap_g):

 import torch
+from torch.nn import functional as F
 def feature_loss(fmap_r, fmap_g):

lib/mel_processing.py CHANGED Viewed

@@ -1,9 +1,7 @@
 import torch
 import torch.utils.data
 from librosa.filters import mel as librosa_mel_fn
-import logging
-logger = logging.getLogger(__name__)
 MAX_WAV_VALUE = 32768.0
@@ -53,10 +51,10 @@ def spectrogram_torch(y, n_fft, sampling_rate, hop_size, win_size, center=False)
         :: (B, Freq, Frame) - Linear-frequency Linear-amplitude spectrogram
     """
     # Validation
-    if torch.min(y) < -1.07:
-        logger.debug("min value is %s", str(torch.min(y)))
-    if torch.max(y) > 1.07:
-        logger.debug("max value is %s", str(torch.max(y)))
     # Window - Cache if needed
     global hann_window

 import torch
 import torch.utils.data
 from librosa.filters import mel as librosa_mel_fn
 MAX_WAV_VALUE = 32768.0
         :: (B, Freq, Frame) - Linear-frequency Linear-amplitude spectrogram
     """
     # Validation
+    if torch.min(y) < -1.0:
+        print("min value is ", torch.min(y))
+    if torch.max(y) > 1.0:
+        print("max value is ", torch.max(y))
     # Window - Cache if needed
     global hann_window

lib/process_ckpt.py CHANGED Viewed

@@ -1,16 +1,8 @@
-import os
-import sys
-import traceback
 from collections import OrderedDict
-import torch
-from i18n.i18n import I18nAuto
-i18n = I18nAuto()
-def savee(ckpt, sr, if_f0, name, epoch, version, hps):
     try:
         opt = OrderedDict()
         opt["weight"] = {}
@@ -18,31 +10,73 @@ def savee(ckpt, sr, if_f0, name, epoch, version, hps):
             if "enc_q" in key:
                 continue
             opt["weight"][key] = ckpt[key].half()
-        opt["config"] = [
-            hps.data.filter_length // 2 + 1,
-            32,
-            hps.model.inter_channels,
-            hps.model.hidden_channels,
-            hps.model.filter_channels,
-            hps.model.n_heads,
-            hps.model.n_layers,
-            hps.model.kernel_size,
-            hps.model.p_dropout,
-            hps.model.resblock,
-            hps.model.resblock_kernel_sizes,
-            hps.model.resblock_dilation_sizes,
-            hps.model.upsample_rates,
-            hps.model.upsample_initial_channel,
-            hps.model.upsample_kernel_sizes,
-            hps.model.spk_embed_dim,
-            hps.model.gin_channels,
-            hps.data.sampling_rate,
-        ]
         opt["info"] = "%sepoch" % epoch
         opt["sr"] = sr
         opt["f0"] = if_f0
-        opt["version"] = version
-        torch.save(opt, "assets/weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()
@@ -51,17 +85,16 @@ def savee(ckpt, sr, if_f0, name, epoch, version, hps):
 def show_info(path):
     try:
         a = torch.load(path, map_location="cpu")
-        return "模型信息:%s\n采样率:%s\n模型是否输入音高引导:%s\n版本:%s" % (
             a.get("info", "None"),
             a.get("sr", "None"),
             a.get("f0", "None"),
-            a.get("version", "None"),
         )
     except:
         return traceback.format_exc()
-def extract_small_model(path, name, sr, if_f0, info, version):
     try:
         ckpt = torch.load(path, map_location="cpu")
         if "model" in ckpt:
@@ -94,98 +127,53 @@ def extract_small_model(path, name, sr, if_f0, info, version):
                 40000,
             ]
         elif sr == "48k":
-            if version == "v1":
-                opt["config"] = [
-                    1025,
-                    32,
-                    192,
-                    192,
-                    768,
-                    2,
-                    6,
-                    3,
-                    0,
-                    "1",
-                    [3, 7, 11],
-                    [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-                    [10, 6, 2, 2, 2],
-                    512,
-                    [16, 16, 4, 4, 4],
-                    109,
-                    256,
-                    48000,
-                ]
-            else:
-                opt["config"] = [
-                    1025,
-                    32,
-                    192,
-                    192,
-                    768,
-                    2,
-                    6,
-                    3,
-                    0,
-                    "1",
-                    [3, 7, 11],
-                    [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-                    [12, 10, 2, 2],
-                    512,
-                    [24, 20, 4, 4],
-                    109,
-                    256,
-                    48000,
-                ]
         elif sr == "32k":
-            if version == "v1":
-                opt["config"] = [
-                    513,
-                    32,
-                    192,
-                    192,
-                    768,
-                    2,
-                    6,
-                    3,
-                    0,
-                    "1",
-                    [3, 7, 11],
-                    [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-                    [10, 4, 2, 2, 2],
-                    512,
-                    [16, 16, 4, 4, 4],
-                    109,
-                    256,
-                    32000,
-                ]
-            else:
-                opt["config"] = [
-                    513,
-                    32,
-                    192,
-                    192,
-                    768,
-                    2,
-                    6,
-                    3,
-                    0,
-                    "1",
-                    [3, 7, 11],
-                    [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
-                    [10, 8, 2, 2],
-                    512,
-                    [20, 16, 4, 4],
-                    109,
-                    256,
-                    32000,
-                ]
         if info == "":
             info = "Extracted model."
         opt["info"] = info
-        opt["version"] = version
         opt["sr"] = sr
         opt["f0"] = int(if_f0)
-        torch.save(opt, "assets/weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()
@@ -197,13 +185,13 @@ def change_info(path, info, name):
         ckpt["info"] = info
         if name == "":
             name = os.path.basename(path)
-        torch.save(ckpt, "assets/weights/%s" % name)
         return "Success."
     except:
         return traceback.format_exc()
-def merge(path1, path2, alpha1, sr, f0, info, name, version):
     try:
         def extract(ckpt):
@@ -252,10 +240,9 @@ def merge(path1, path2, alpha1, sr, f0, info, name, version):
         elif(sr=="32k"):opt["config"] = [513, 32, 192, 192, 768, 2, 6, 3, 0, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 4, 2, 2, 2], 512, [16, 16, 4, 4,4], 109, 256, 32000]
         """
         opt["sr"] = sr
-        opt["f0"] = 1 if f0 == i18n("是") else 0
-        opt["version"] = version
         opt["info"] = info
-        torch.save(opt, "assets/weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()

+import torch, traceback, os, pdb
 from collections import OrderedDict
+def savee(ckpt, sr, if_f0, name, epoch):
     try:
         opt = OrderedDict()
         opt["weight"] = {}
             if "enc_q" in key:
                 continue
             opt["weight"][key] = ckpt[key].half()
+        if sr == "40k":
+            opt["config"] = [
+                1025,
+                32,
+                192,
+                192,
+                768,
+                2,
+                6,
+                3,
+                0,
+                "1",
+                [3, 7, 11],
+                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                [10, 10, 2, 2],
+                512,
+                [16, 16, 4, 4],
+                109,
+                256,
+                40000,
+            ]
+        elif sr == "48k":
+            opt["config"] = [
+                1025,
+                32,
+                192,
+                192,
+                768,
+                2,
+                6,
+                3,
+                0,
+                "1",
+                [3, 7, 11],
+                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                [10, 6, 2, 2, 2],
+                512,
+                [16, 16, 4, 4, 4],
+                109,
+                256,
+                48000,
+            ]
+        elif sr == "32k":
+            opt["config"] = [
+                513,
+                32,
+                192,
+                192,
+                768,
+                2,
+                6,
+                3,
+                0,
+                "1",
+                [3, 7, 11],
+                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                [10, 4, 2, 2, 2],
+                512,
+                [16, 16, 4, 4, 4],
+                109,
+                256,
+                32000,
+            ]
         opt["info"] = "%sepoch" % epoch
         opt["sr"] = sr
         opt["f0"] = if_f0
+        torch.save(opt, "weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()
 def show_info(path):
     try:
         a = torch.load(path, map_location="cpu")
+        return "模型信息:%s\n采样率:%s\n模型是否输入音高引导:%s" % (
             a.get("info", "None"),
             a.get("sr", "None"),
             a.get("f0", "None"),
         )
     except:
         return traceback.format_exc()
+def extract_small_model(path, name, sr, if_f0, info):
     try:
         ckpt = torch.load(path, map_location="cpu")
         if "model" in ckpt:
                 40000,
             ]
         elif sr == "48k":
+            opt["config"] = [
+                1025,
+                32,
+                192,
+                192,
+                768,
+                2,
+                6,
+                3,
+                0,
+                "1",
+                [3, 7, 11],
+                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                [10, 6, 2, 2, 2],
+                512,
+                [16, 16, 4, 4, 4],
+                109,
+                256,
+                48000,
+            ]
         elif sr == "32k":
+            opt["config"] = [
+                513,
+                32,
+                192,
+                192,
+                768,
+                2,
+                6,
+                3,
+                0,
+                "1",
+                [3, 7, 11],
+                [[1, 3, 5], [1, 3, 5], [1, 3, 5]],
+                [10, 4, 2, 2, 2],
+                512,
+                [16, 16, 4, 4, 4],
+                109,
+                256,
+                32000,
+            ]
         if info == "":
             info = "Extracted model."
         opt["info"] = info
         opt["sr"] = sr
         opt["f0"] = int(if_f0)
+        torch.save(opt, "weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()
         ckpt["info"] = info
         if name == "":
             name = os.path.basename(path)
+        torch.save(ckpt, "weights/%s" % name)
         return "Success."
     except:
         return traceback.format_exc()
+def merge(path1, path2, alpha1, sr, f0, info, name):
     try:
         def extract(ckpt):
         elif(sr=="32k"):opt["config"] = [513, 32, 192, 192, 768, 2, 6, 3, 0, "1", [3, 7, 11], [[1, 3, 5], [1, 3, 5], [1, 3, 5]], [10, 4, 2, 2, 2], 512, [16, 16, 4, 4,4], 109, 256, 32000]
         """
         opt["sr"] = sr
+        opt["f0"] = 1 if f0 == "是" else 0
         opt["info"] = info
+        torch.save(opt, "weights/%s.pth" % name)
         return "Success."
     except:
         return traceback.format_exc()

lib/utils.py CHANGED Viewed

@@ -1,15 +1,13 @@
-import argparse
 import glob
-import json
 import logging
-import os
 import subprocess
-import sys
-import shutil
 import numpy as np
-import torch
 from scipy.io.wavfile import read
 MATPLOTLIB_FLAG = False
@@ -33,25 +31,22 @@ def load_checkpoint_d(checkpoint_path, combd, sbd, optimizer=None, load_opt=1):
             try:
                 new_state_dict[k] = saved_state_dict[k]
                 if saved_state_dict[k].shape != state_dict[k].shape:
-                    logger.warning(
-                        "shape-%s-mismatch. need: %s, get: %s",
-                        k,
-                        state_dict[k].shape,
-                        saved_state_dict[k].shape,
                     )  #
                     raise KeyError
             except:
                 # logger.info(traceback.format_exc())
-                logger.info("%s is not in the checkpoint", k)  # pretrain缺失的
                 new_state_dict[k] = v  # 模型自带的随机值
         if hasattr(model, "module"):
             model.module.load_state_dict(new_state_dict, strict=False)
         else:
             model.load_state_dict(new_state_dict, strict=False)
-        return model
     go(combd, "combd")
-    model = go(sbd, "sbd")
     #############
     logger.info("Loaded model weights")
@@ -111,16 +106,14 @@ def load_checkpoint(checkpoint_path, model, optimizer=None, load_opt=1):
         try:
             new_state_dict[k] = saved_state_dict[k]
             if saved_state_dict[k].shape != state_dict[k].shape:
-                logger.warning(
-                    "shape-%s-mismatch|need-%s|get-%s",
-                    k,
-                    state_dict[k].shape,
-                    saved_state_dict[k].shape,
                 )  #
                 raise KeyError
         except:
             # logger.info(traceback.format_exc())
-            logger.info("%s is not in the checkpoint", k)  # pretrain缺失的
             new_state_dict[k] = v  # 模型自带的随机值
     if hasattr(model, "module"):
         model.module.load_state_dict(new_state_dict, strict=False)
@@ -211,7 +204,7 @@ def latest_checkpoint_path(dir_path, regex="G_*.pth"):
     f_list = glob.glob(os.path.join(dir_path, regex))
     f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
     x = f_list[-1]
-    logger.debug(x)
     return x
@@ -291,8 +284,8 @@ def get_hparams(init=True):
         bs                                    done
         pretrainG、pretrainD                  done
         卡号：os.en["CUDA_VISIBLE_DEVICES"]   done
-        if_latest                             done
-      模型：if_f0                             done
       采样率：自动选择config                  done
       是否缓存数据集进GPU:if_cache_data_in_gpu done
@@ -301,6 +294,7 @@ def get_hparams(init=True):
       -c不要了
     """
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "-se",
         "--save_every_epoch",
@@ -327,16 +321,6 @@ def get_hparams(init=True):
     parser.add_argument(
         "-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k"
     )
-    parser.add_argument(
-        "-sw",
-        "--save_every_weights",
-        type=str,
-        default="0",
-        help="save the extracted model in weights directory when saving checkpoints",
-    )
-    parser.add_argument(
-        "-v", "--version", type=str, required=True, help="model version"
-    )
     parser.add_argument(
         "-f0",
         "--if_f0",
@@ -363,9 +347,20 @@ def get_hparams(init=True):
     name = args.experiment_dir
     experiment_dir = os.path.join("./logs", args.experiment_dir)
     config_save_path = os.path.join(experiment_dir, "config.json")
-    with open(config_save_path, "r") as f:
-        config = json.load(f)
     hparams = HParams(**config)
     hparams.model_dir = hparams.experiment_dir = experiment_dir
@@ -374,13 +369,11 @@ def get_hparams(init=True):
     hparams.total_epoch = args.total_epoch
     hparams.pretrainG = args.pretrainG
     hparams.pretrainD = args.pretrainD
-    hparams.version = args.version
     hparams.gpus = args.gpus
     hparams.train.batch_size = args.batch_size
     hparams.sample_rate = args.sample_rate
     hparams.if_f0 = args.if_f0
     hparams.if_latest = args.if_latest
-    hparams.save_every_weights = args.save_every_weights
     hparams.if_cache_data_in_gpu = args.if_cache_data_in_gpu
     hparams.data.training_files = "%s/filelist.txt" % experiment_dir
     return hparams
@@ -409,7 +402,7 @@ def get_hparams_from_file(config_path):
 def check_git_hash(model_dir):
     source_dir = os.path.dirname(os.path.realpath(__file__))
     if not os.path.exists(os.path.join(source_dir, ".git")):
-        logger.warning(
             "{} is not a git repository, therefore hash value comparison will be ignored.".format(
                 source_dir
             )
@@ -422,7 +415,7 @@ def check_git_hash(model_dir):
     if os.path.exists(path):
         saved_hash = open(path).read()
         if saved_hash != cur_hash:
-            logger.warning(
                 "git hash values are different. {}(saved) != {}(current)".format(
                     saved_hash[:8], cur_hash[:8]
                 )

+import os, traceback
 import glob
+import sys
+import argparse
 import logging
+import json
 import subprocess
 import numpy as np
 from scipy.io.wavfile import read
+import torch
 MATPLOTLIB_FLAG = False
             try:
                 new_state_dict[k] = saved_state_dict[k]
                 if saved_state_dict[k].shape != state_dict[k].shape:
+                    print(
+                        "shape-%s-mismatch|need-%s|get-%s"
+                        % (k, state_dict[k].shape, saved_state_dict[k].shape)
                     )  #
                     raise KeyError
             except:
                 # logger.info(traceback.format_exc())
+                logger.info("%s is not in the checkpoint" % k)  # pretrain缺失的
                 new_state_dict[k] = v  # 模型自带的随机值
         if hasattr(model, "module"):
             model.module.load_state_dict(new_state_dict, strict=False)
         else:
             model.load_state_dict(new_state_dict, strict=False)
     go(combd, "combd")
+    go(sbd, "sbd")
     #############
     logger.info("Loaded model weights")
         try:
             new_state_dict[k] = saved_state_dict[k]
             if saved_state_dict[k].shape != state_dict[k].shape:
+                print(
+                    "shape-%s-mismatch|need-%s|get-%s"
+                    % (k, state_dict[k].shape, saved_state_dict[k].shape)
                 )  #
                 raise KeyError
         except:
             # logger.info(traceback.format_exc())
+            logger.info("%s is not in the checkpoint" % k)  # pretrain缺失的
             new_state_dict[k] = v  # 模型自带的随机值
     if hasattr(model, "module"):
         model.module.load_state_dict(new_state_dict, strict=False)
     f_list = glob.glob(os.path.join(dir_path, regex))
     f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
     x = f_list[-1]
+    print(x)
     return x
         bs                                    done
         pretrainG、pretrainD                  done
         卡号：os.en["CUDA_VISIBLE_DEVICES"]   done
+        if_latest                             todo
+      模型：if_f0                             todo
       采样率：自动选择config                  done
       是否缓存数据集进GPU:if_cache_data_in_gpu done
       -c不要了
     """
     parser = argparse.ArgumentParser()
+    # parser.add_argument('-c', '--config', type=str, default="configs/40k.json",help='JSON file for configuration')
     parser.add_argument(
         "-se",
         "--save_every_epoch",
     parser.add_argument(
         "-sr", "--sample_rate", type=str, required=True, help="sample rate, 32k/40k/48k"
     )
     parser.add_argument(
         "-f0",
         "--if_f0",
     name = args.experiment_dir
     experiment_dir = os.path.join("./logs", args.experiment_dir)
+    if not os.path.exists(experiment_dir):
+        os.makedirs(experiment_dir)
+    config_path = "configs/%s.json" % args.sample_rate
     config_save_path = os.path.join(experiment_dir, "config.json")
+    if init:
+        with open(config_path, "r") as f:
+            data = f.read()
+        with open(config_save_path, "w") as f:
+            f.write(data)
+    else:
+        with open(config_save_path, "r") as f:
+            data = f.read()
+    config = json.loads(data)
     hparams = HParams(**config)
     hparams.model_dir = hparams.experiment_dir = experiment_dir
     hparams.total_epoch = args.total_epoch
     hparams.pretrainG = args.pretrainG
     hparams.pretrainD = args.pretrainD
     hparams.gpus = args.gpus
     hparams.train.batch_size = args.batch_size
     hparams.sample_rate = args.sample_rate
     hparams.if_f0 = args.if_f0
     hparams.if_latest = args.if_latest
     hparams.if_cache_data_in_gpu = args.if_cache_data_in_gpu
     hparams.data.training_files = "%s/filelist.txt" % experiment_dir
     return hparams
 def check_git_hash(model_dir):
     source_dir = os.path.dirname(os.path.realpath(__file__))
     if not os.path.exists(os.path.join(source_dir, ".git")):
+        logger.warn(
             "{} is not a git repository, therefore hash value comparison will be ignored.".format(
                 source_dir
             )
     if os.path.exists(path):
         saved_hash = open(path).read()
         if saved_hash != cur_hash:
+            logger.warn(
                 "git hash values are different. {}(saved) != {}(current)".format(
                     saved_hash[:8], cur_hash[:8]
                 )