Spaces:

None1145
/

So-VITS-SVC

Runtime error

App Files Files Community

None1145 commited on Nov 17, 2024

Commit

8ab304b

verified ·

1 Parent(s): 96ba2ba

Delete configs_template

Browse files

Files changed (3) hide show

configs_template/config_template.json +0 -79
configs_template/config_tiny_template.json +0 -79
configs_template/diffusion_template.yaml +0 -51

configs_template/config_template.json DELETED Viewed

@@ -1,79 +0,0 @@
-{
-  "train": {
-    "log_interval": 200,
-    "eval_interval": 800,
-    "seed": 1234,
-    "epochs": 10000,
-    "learning_rate": 0.0001,
-    "betas": [
-      0.8,
-      0.99
-    ],
-    "eps": 1e-09,
-    "batch_size": 6,
-    "fp16_run": false,
-    "half_type": "fp16",
-    "lr_decay": 0.999875,
-    "segment_size": 10240,
-    "init_lr_ratio": 1,
-    "warmup_epochs": 0,
-    "c_mel": 45,
-    "c_kl": 1.0,
-    "use_sr": true,
-    "max_speclen": 512,
-    "port": "8001",
-    "keep_ckpts": 3,
-    "all_in_mem": false,
-    "vol_aug":false
-  },
-  "data": {
-    "training_files": "filelists/train.txt",
-    "validation_files": "filelists/val.txt",
-    "max_wav_value": 32768.0,
-    "sampling_rate": 44100,
-    "filter_length": 2048,
-    "hop_length": 512,
-    "win_length": 2048,
-    "n_mel_channels": 80,
-    "mel_fmin": 0.0,
-    "mel_fmax": 22050,
-    "unit_interpolate_mode":"nearest"
-  },
-  "model": {
-    "inter_channels": 192,
-    "hidden_channels": 192,
-    "filter_channels": 768,
-    "n_heads": 2,
-    "n_layers": 6,
-    "kernel_size": 3,
-    "p_dropout": 0.1,
-    "resblock": "1",
-    "resblock_kernel_sizes": [3,7,11],
-    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
-    "upsample_rates": [ 8, 8, 2, 2, 2],
-    "upsample_initial_channel": 512,
-    "upsample_kernel_sizes": [16,16, 4, 4, 4],
-    "n_layers_q": 3,
-    "n_layers_trans_flow": 3,
-    "n_flow_layer": 4,
-    "use_spectral_norm": false,
-    "gin_channels": 768,
-    "ssl_dim": 768,
-    "n_speakers": 200,
-    "vocoder_name":"nsf-hifigan",
-    "speech_encoder":"vec768l12",
-    "speaker_embedding":false,
-    "vol_embedding":false,
-    "use_depthwise_conv":false,
-    "flow_share_parameter": false,
-    "use_automatic_f0_prediction": true,
-    "use_transformer_flow": false
-  },
-  "spk": {
-    "nyaru": 0,
-    "huiyu": 1,
-    "nen": 2,
-    "paimon": 3,
-    "yunhao": 4
-  }
-}

configs_template/config_tiny_template.json DELETED Viewed

@@ -1,79 +0,0 @@
-{
-  "train": {
-    "log_interval": 200,
-    "eval_interval": 800,
-    "seed": 1234,
-    "epochs": 10000,
-    "learning_rate": 0.0001,
-    "betas": [
-      0.8,
-      0.99
-    ],
-    "eps": 1e-09,
-    "batch_size": 6,
-    "fp16_run": false,
-    "half_type": "fp16",
-    "lr_decay": 0.999875,
-    "segment_size": 10240,
-    "init_lr_ratio": 1,
-    "warmup_epochs": 0,
-    "c_mel": 45,
-    "c_kl": 1.0,
-    "use_sr": true,
-    "max_speclen": 512,
-    "port": "8001",
-    "keep_ckpts": 3,
-    "all_in_mem": false,
-    "vol_aug":false
-  },
-  "data": {
-    "training_files": "filelists/train.txt",
-    "validation_files": "filelists/val.txt",
-    "max_wav_value": 32768.0,
-    "sampling_rate": 44100,
-    "filter_length": 2048,
-    "hop_length": 512,
-    "win_length": 2048,
-    "n_mel_channels": 80,
-    "mel_fmin": 0.0,
-    "mel_fmax": 22050,
-    "unit_interpolate_mode":"nearest"
-  },
-  "model": {
-    "inter_channels": 192,
-    "hidden_channels": 192,
-    "filter_channels": 512,
-    "n_heads": 2,
-    "n_layers": 6,
-    "kernel_size": 3,
-    "p_dropout": 0.1,
-    "resblock": "1",
-    "resblock_kernel_sizes": [3,7,11],
-    "resblock_dilation_sizes": [[1,3,5], [1,3,5], [1,3,5]],
-    "upsample_rates": [ 8, 8, 2, 2, 2],
-    "upsample_initial_channel": 400,
-    "upsample_kernel_sizes": [16,16, 4, 4, 4],
-    "n_layers_q": 3,
-    "n_layers_trans_flow": 3,
-    "n_flow_layer": 4,
-    "use_spectral_norm": false,
-    "gin_channels": 768,
-    "ssl_dim": 768,
-    "n_speakers": 200,
-    "vocoder_name":"nsf-hifigan",
-    "speech_encoder":"vec768l12",
-    "speaker_embedding":false,
-    "vol_embedding":false,
-    "use_depthwise_conv":true,
-    "flow_share_parameter": true,
-    "use_automatic_f0_prediction": true,
-    "use_transformer_flow": false
-  },
-  "spk": {
-    "nyaru": 0,
-    "huiyu": 1,
-    "nen": 2,
-    "paimon": 3,
-    "yunhao": 4
-  }
-}

configs_template/diffusion_template.yaml DELETED Viewed

@@ -1,51 +0,0 @@
-data:
-  sampling_rate: 44100
-  block_size: 512 # Equal to hop_length
-  duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
-  encoder: 'vec768l12' # 'hubertsoft', 'vec256l9', 'vec768l12'
-  cnhubertsoft_gate: 10
-  encoder_sample_rate: 16000
-  encoder_hop_size: 320
-  encoder_out_channels: 768 # 256 if using 'hubertsoft'
-  training_files: "filelists/train.txt"
-  validation_files: "filelists/val.txt"
-  extensions: # List of extension included in the data collection
-    - wav
-  unit_interpolate_mode: "nearest"
-model:
-  type: 'Diffusion'
-  n_layers: 20
-  n_chans: 512
-  n_hidden: 256
-  use_pitch_aug: true
-  timesteps : 1000
-  k_step_max: 0 # must <= timesteps, If it is 0, train all
-  n_spk: 1 # max number of different speakers
-device: cuda
-vocoder:
-  type: 'nsf-hifigan'
-  ckpt: 'pretrain/nsf_hifigan/model'
-infer:
-  speedup: 10
-  method: 'dpm-solver++' # 'pndm' or 'dpm-solver' or 'ddim' or 'unipc' or 'dpm-solver++'
-env:
-  expdir: logs/44k/diffusion
-  gpu_id: 0
-train:
-  num_workers: 4 # If your cpu and gpu are both very strong, set to 0 may be faster!
-  amp_dtype: fp32 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu)
-  batch_size: 48
-  cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
-  cache_device: 'cpu' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
-  cache_fp16: true
-  epochs: 100000
-  interval_log: 10
-  interval_val: 2000
-  interval_force_save: 5000
-  lr: 0.0001
-  decay_step: 100000
-  gamma: 0.5
-  weight_decay: 0
-  save_opt: false
-spk:
-  'nyaru': 0