|
K_step: 1000 |
|
accumulate_grad_batches: 1 |
|
audio_num_mel_bins: 128 |
|
audio_sample_rate: 44100 |
|
binarization_args: |
|
shuffle: false |
|
with_align: true |
|
with_f0: true |
|
with_hubert: true |
|
with_spk_embed: false |
|
with_wav: false |
|
binarizer_cls: preprocessing.SVCpre.SVCBinarizer |
|
binary_data_dir: data/binary/Unnamed |
|
check_val_every_n_epoch: 10 |
|
choose_test_manually: false |
|
clip_grad_norm: 1 |
|
config_path: training/config_nsf.yaml |
|
content_cond_steps: [] |
|
cwt_add_f0_loss: false |
|
cwt_hidden_size: 128 |
|
cwt_layers: 2 |
|
cwt_loss: l1 |
|
cwt_std_scale: 0.8 |
|
datasets: |
|
- opencpop |
|
debug: false |
|
dec_ffn_kernel_size: 9 |
|
dec_layers: 4 |
|
decay_steps: 20000 |
|
decoder_type: fft |
|
dict_dir: '' |
|
diff_decoder_type: wavenet |
|
diff_loss_type: l2 |
|
dilation_cycle_length: 4 |
|
dropout: 0.1 |
|
ds_workers: 4 |
|
dur_enc_hidden_stride_kernel: |
|
- 0,2,3 |
|
- 0,2,3 |
|
- 0,1,3 |
|
dur_loss: mse |
|
dur_predictor_kernel: 3 |
|
dur_predictor_layers: 5 |
|
enc_ffn_kernel_size: 9 |
|
enc_layers: 4 |
|
encoder_K: 8 |
|
encoder_type: fft |
|
endless_ds: false |
|
f0_bin: 256 |
|
f0_max: 1100.0 |
|
f0_min: 40.0 |
|
ffn_act: gelu |
|
ffn_padding: SAME |
|
fft_size: 2048 |
|
fmax: 16000 |
|
fmin: 40 |
|
fs2_ckpt: '' |
|
gaussian_start: true |
|
gen_dir_name: '' |
|
gen_tgt_spk_id: -1 |
|
hidden_size: 256 |
|
hop_size: 512 |
|
hubert_gpu: true |
|
hubert_path: checkpoints/hubert/hubert_soft.pt |
|
infer: false |
|
keep_bins: 128 |
|
lambda_commit: 0.25 |
|
lambda_energy: 0.0 |
|
lambda_f0: 1.0 |
|
lambda_ph_dur: 0.3 |
|
lambda_sent_dur: 1.0 |
|
lambda_uv: 1.0 |
|
lambda_word_dur: 1.0 |
|
load_ckpt: pretrain/nehito_ckpt_steps_1000000.ckpt |
|
log_interval: 100 |
|
loud_norm: false |
|
lr: 5.0e-05 |
|
max_beta: 0.02 |
|
max_epochs: 3000 |
|
max_eval_sentences: 1 |
|
max_eval_tokens: 60000 |
|
max_frames: 42000 |
|
max_input_tokens: 60000 |
|
max_sentences: 12 |
|
max_tokens: 128000 |
|
max_updates: 1000000 |
|
mel_loss: ssim:0.5|l1:0.5 |
|
mel_vmax: 1.5 |
|
mel_vmin: -6.0 |
|
min_level_db: -120 |
|
no_fs2: true |
|
norm_type: gn |
|
num_ckpt_keep: 10 |
|
num_heads: 2 |
|
num_sanity_val_steps: 1 |
|
num_spk: 1 |
|
num_test_samples: 0 |
|
num_valid_plots: 10 |
|
optimizer_adam_beta1: 0.9 |
|
optimizer_adam_beta2: 0.98 |
|
out_wav_norm: false |
|
pe_ckpt: checkpoints/0102_xiaoma_pe/model_ckpt_steps_60000.ckpt |
|
pe_enable: false |
|
perform_enhance: true |
|
pitch_ar: false |
|
pitch_enc_hidden_stride_kernel: |
|
- 0,2,5 |
|
- 0,2,5 |
|
- 0,2,5 |
|
pitch_extractor: parselmouth |
|
pitch_loss: l2 |
|
pitch_norm: log |
|
pitch_type: frame |
|
pndm_speedup: 10 |
|
pre_align_args: |
|
allow_no_txt: false |
|
denoise: false |
|
forced_align: mfa |
|
txt_processor: zh_g2pM |
|
use_sox: true |
|
use_tone: false |
|
pre_align_cls: data_gen.singing.pre_align.SingingPreAlign |
|
predictor_dropout: 0.5 |
|
predictor_grad: 0.1 |
|
predictor_hidden: -1 |
|
predictor_kernel: 5 |
|
predictor_layers: 5 |
|
prenet_dropout: 0.5 |
|
prenet_hidden_size: 256 |
|
pretrain_fs_ckpt: '' |
|
processed_data_dir: xxx |
|
profile_infer: false |
|
raw_data_dir: data/raw/Unnamed |
|
ref_norm_layer: bn |
|
rel_pos: true |
|
reset_phone_dict: true |
|
residual_channels: 384 |
|
residual_layers: 20 |
|
save_best: false |
|
save_ckpt: true |
|
save_codes: |
|
- configs |
|
- modules |
|
- src |
|
- utils |
|
save_f0: true |
|
save_gt: false |
|
schedule_type: linear |
|
seed: 1234 |
|
sort_by_len: true |
|
speaker_id: Unnamed |
|
spec_max: |
|
- -0.4884430170059204 |
|
- 0.004534448496997356 |
|
- 0.5684943795204163 |
|
- 0.6527385115623474 |
|
- 0.659079372882843 |
|
- 0.7416915893554688 |
|
- 0.844637930393219 |
|
- 0.806076169013977 |
|
- 0.7238750457763672 |
|
- 0.9744535088539124 |
|
- 0.9476388692855835 |
|
- 0.9883336424827576 |
|
- 1.0821290016174316 |
|
- 1.046391248703003 |
|
- 0.9829667806625366 |
|
- 1.0163493156433105 |
|
- 0.9825412631034851 |
|
- 0.9834834337234497 |
|
- 0.9811502695083618 |
|
- 1.128888726234436 |
|
- 1.186057209968567 |
|
- 1.112004280090332 |
|
- 1.1282787322998047 |
|
- 1.051572322845459 |
|
- 1.0510444641113281 |
|
- 1.0110565423965454 |
|
- 0.9236567616462708 |
|
- 0.8036720156669617 |
|
- 0.8383486270904541 |
|
- 0.7735869288444519 |
|
- 0.9303848743438721 |
|
- 1.1257890462875366 |
|
- 1.1610286235809326 |
|
- 1.0335885286331177 |
|
- 1.0645352602005005 |
|
- 1.0619306564331055 |
|
- 1.1310148239135742 |
|
- 1.1191954612731934 |
|
- 1.1307402849197388 |
|
- 0.8837698698043823 |
|
- 1.1153966188430786 |
|
- 1.1045044660568237 |
|
- 1.0479614734649658 |
|
- 0.9491603374481201 |
|
- 0.9858523011207581 |
|
- 0.9226155281066895 |
|
- 0.9469702839851379 |
|
- 0.8791896104812622 |
|
- 0.997624933719635 |
|
- 0.9068642854690552 |
|
- 0.9575618505477905 |
|
- 0.8551340699195862 |
|
- 0.8397778272628784 |
|
- 0.8908605575561523 |
|
- 0.7986546158790588 |
|
- 0.7983465194702148 |
|
- 0.6965265274047852 |
|
- 0.640673041343689 |
|
- 0.6690735220909119 |
|
- 0.5631484985351562 |
|
- 0.48587048053741455 |
|
- 0.5326520800590515 |
|
- 0.4286036193370819 |
|
- 0.35252484679222107 |
|
- 0.3290073573589325 |
|
- 0.4754445552825928 |
|
- 0.3632410168647766 |
|
- 0.391481876373291 |
|
- 0.20288512110710144 |
|
- 0.18305960297584534 |
|
- 0.1539602279663086 |
|
- 0.03451670706272125 |
|
- -0.16881510615348816 |
|
- -0.02030198462307453 |
|
- 0.10024689882993698 |
|
- -0.023952053859829903 |
|
- 0.05635542422533035 |
|
- 0.10877621918916702 |
|
- 0.006155031267553568 |
|
- 0.07318088412284851 |
|
- 0.14075303077697754 |
|
- 0.057870157063007355 |
|
- -0.0520513579249382 |
|
- 0.1741427332162857 |
|
- -0.11464552581310272 |
|
- 0.03305494412779808 |
|
- -0.06897418200969696 |
|
- -0.12598733603954315 |
|
- -0.09894973039627075 |
|
- -0.2817802429199219 |
|
- -0.0825519785284996 |
|
- -0.3040400445461273 |
|
- -0.4998124837875366 |
|
- -0.36957985162734985 |
|
- -0.5409602522850037 |
|
- -0.49879470467567444 |
|
- -0.713716983795166 |
|
- -0.6545754671096802 |
|
- -0.6425778865814209 |
|
- -0.6178902387619019 |
|
- -0.47356730699539185 |
|
- -0.6165243983268738 |
|
- -0.5841533541679382 |
|
- -0.5759448409080505 |
|
- -0.5498068332672119 |
|
- -0.4661938548088074 |
|
- -0.5811225771903992 |
|
- -0.614664614200592 |
|
- -0.3902229070663452 |
|
- -0.7037366032600403 |
|
- -0.7260795831680298 |
|
- -0.7540019750595093 |
|
- -0.8360528945922852 |
|
- -0.8374698758125305 |
|
- -0.8328713178634644 |
|
- -0.9081047177314758 |
|
- -0.9679695963859558 |
|
- -0.9587443470954895 |
|
- -1.0706337690353394 |
|
- -0.9818469285964966 |
|
- -0.8360191583633423 |
|
- -0.9938981533050537 |
|
- -1.0823708772659302 |
|
- -1.0617167949676514 |
|
- -1.1093820333480835 |
|
- -1.1300138235092163 |
|
- -1.2141350507736206 |
|
- -1.3147293329238892 |
|
spec_min: |
|
- -4.473258972167969 |
|
- -4.244492530822754 |
|
- -4.390527725219727 |
|
- -4.209497928619385 |
|
- -4.446024417877197 |
|
- -4.3960185050964355 |
|
- -4.164802551269531 |
|
- -4.5063300132751465 |
|
- -4.608232021331787 |
|
- -4.251623630523682 |
|
- -4.4799604415893555 |
|
- -4.733210563659668 |
|
- -4.411860466003418 |
|
- -4.609100818634033 |
|
- -4.726972579956055 |
|
- -4.428761959075928 |
|
- -4.487612247467041 |
|
- -4.525552749633789 |
|
- -4.480506896972656 |
|
- -4.589383125305176 |
|
- -4.608384132385254 |
|
- -4.385376453399658 |
|
- -4.816161632537842 |
|
- -4.8706955909729 |
|
- -4.848956108093262 |
|
- -4.431278705596924 |
|
- -4.999994277954102 |
|
- -4.818373203277588 |
|
- -4.527368068695068 |
|
- -4.872085094451904 |
|
- -4.894851207733154 |
|
- -4.511948585510254 |
|
- -4.534575939178467 |
|
- -4.57792854309082 |
|
- -4.444681644439697 |
|
- -4.628803253173828 |
|
- -4.74341344833374 |
|
- -4.85427713394165 |
|
- -4.723776817321777 |
|
- -4.7166008949279785 |
|
- -4.749168395996094 |
|
- -4.67240047454834 |
|
- -4.590690612792969 |
|
- -4.576009750366211 |
|
- -4.542308330535889 |
|
- -4.890907287597656 |
|
- -4.613001823425293 |
|
- -4.494126796722412 |
|
- -4.474257946014404 |
|
- -4.574635028839111 |
|
- -4.4817585945129395 |
|
- -4.651009559631348 |
|
- -4.478254795074463 |
|
- -4.523812770843506 |
|
- -4.546536922454834 |
|
- -4.535660266876221 |
|
- -4.470296859741211 |
|
- -4.577486991882324 |
|
- -4.541748046875 |
|
- -4.428532123565674 |
|
- -4.461862564086914 |
|
- -4.489077091217041 |
|
- -4.515830039978027 |
|
- -4.395663738250732 |
|
- -4.439975738525391 |
|
- -4.4290876388549805 |
|
- -4.397741794586182 |
|
- -4.478252410888672 |
|
- -4.399686336517334 |
|
- -4.45617151260376 |
|
- -4.434477806091309 |
|
- -4.442898750305176 |
|
- -4.5840277671813965 |
|
- -4.537542819976807 |
|
- -4.492046356201172 |
|
- -4.534677505493164 |
|
- -4.477104187011719 |
|
- -4.511618614196777 |
|
- -4.387601375579834 |
|
- -4.499236106872559 |
|
- -4.3717169761657715 |
|
- -4.4242024421691895 |
|
- -4.4055657386779785 |
|
- -4.429355144500732 |
|
- -4.4636993408203125 |
|
- -4.508528232574463 |
|
- -4.515079498291016 |
|
- -4.426190376281738 |
|
- -4.433525085449219 |
|
- -4.4200215339660645 |
|
- -4.421280860900879 |
|
- -4.400143623352051 |
|
- -4.419166088104248 |
|
- -4.429825305938721 |
|
- -4.436781406402588 |
|
- -4.51550817489624 |
|
- -4.518474578857422 |
|
- -4.495880603790283 |
|
- -4.483924865722656 |
|
- -4.409562587738037 |
|
- -4.3811845779418945 |
|
- -4.411908149719238 |
|
- -4.427165985107422 |
|
- -4.396549701690674 |
|
- -4.340637683868408 |
|
- -4.405435085296631 |
|
- -4.367630481719971 |
|
- -4.419083595275879 |
|
- -4.389026165008545 |
|
- -4.371067047119141 |
|
- -4.370710372924805 |
|
- -4.3755269050598145 |
|
- -4.39500093460083 |
|
- -4.451773166656494 |
|
- -4.365351676940918 |
|
- -4.348028182983398 |
|
- -4.408270359039307 |
|
- -4.390385627746582 |
|
- -4.347931861877441 |
|
- -4.378237247467041 |
|
- -4.426717758178711 |
|
- -4.364233493804932 |
|
- -4.371546745300293 |
|
- -4.402477264404297 |
|
- -4.430750846862793 |
|
- -4.404538154602051 |
|
- -4.384459018707275 |
|
- -4.401677131652832 |
|
spk_cond_steps: [] |
|
stop_token_weight: 5.0 |
|
task_cls: training.task.SVC_task.SVCTask |
|
test_ids: [] |
|
test_input_dir: '' |
|
test_num: 0 |
|
test_prefixes: |
|
- test |
|
test_set_name: test |
|
timesteps: 1000 |
|
train_set_name: train |
|
use_crepe: false |
|
use_denoise: false |
|
use_energy_embed: false |
|
use_gt_dur: false |
|
use_gt_f0: false |
|
use_midi: false |
|
use_nsf: true |
|
use_pitch_embed: true |
|
use_pos_embed: true |
|
use_spk_embed: false |
|
use_spk_id: false |
|
use_split_spk_id: false |
|
use_uv: false |
|
use_var_enc: false |
|
use_vec: false |
|
val_check_interval: 1000 |
|
valid_num: 0 |
|
valid_set_name: valid |
|
vocoder: network.vocoders.nsf_hifigan.NsfHifiGAN |
|
vocoder_ckpt: checkpoints/nsf_hifigan/model |
|
warmup_updates: 2000 |
|
wav2spec_eps: 1e-6 |
|
weight_decay: 0 |
|
win_size: 2048 |
|
work_dir: checkpoints/HokoHifi |
|
|