andybi7676's picture
end-2-end reborn model for librispeech unsupervised phoneme recognition (iter5-stage1)
62e72b3 verified
{
"architectures": [
"RebornUASRModel"
],
"auto_map": {
"AutoConfig": "configuration_reborn.RebornUASRConfig",
"AutoModel": "modeling_reborn.RebornUASRModel"
},
"discriminator_act_after_linear": false,
"discriminator_causal": true,
"discriminator_depth": 1,
"discriminator_dilation": 1,
"discriminator_dim": 256,
"discriminator_dropout": 0.0,
"discriminator_input_dim": 44,
"discriminator_kernel": 3,
"discriminator_linear_emb": false,
"discriminator_max_pool": false,
"discriminator_spectral_norm": false,
"discriminator_weight_norm": false,
"generator_bias": false,
"generator_bn_apply": false,
"generator_bn_init_weight": 30.0,
"generator_dilation": 1,
"generator_dropout": 0.0,
"generator_input_dim": 512,
"generator_kernel": 4,
"generator_output_dim": 44,
"generator_stride": 1,
"model_type": "reborn_uasr",
"phones": [
"AH",
"N",
"S",
"IH",
"T",
"L",
"R",
"D",
"K",
"IY",
"Z",
"M",
"ER",
"EH",
"P",
"AE",
"B",
"AA",
"EY",
"F",
"OW",
"NG",
"G",
"V",
"AO",
"AY",
"SH",
"UW",
"W",
"HH",
"JH",
"Y",
"CH",
"TH",
"AW",
"UH",
"OY",
"DH",
"ZH",
"<SIL>"
],
"segmenter_dropout": 0.1,
"segmenter_hidden_dim": 512,
"segmenter_input_dim": 512,
"segmenter_kernel_size": 7,
"segmenter_type": "cnn",
"special_token_nums": 4,
"torch_dtype": "float32",
"transformers_version": "4.24.0"
}