checkpoints / sv_encoder /sv_encoder.yaml
Night-Quiet's picture
zjt
5e0c4e5
raw
history blame contribute delete
803 Bytes
model:
_target_: modules.gaudio.sv_encoder.SenseVoiceSmall
specaug:
_target_: modules.gaudio.sv_encoder.SpecAugLFR
apply_freq_mask: true
apply_time_mask: true
apply_time_warp: false
freq_mask_width_range: [0, 30]
lfr_rate: 6
num_freq_mask: 1
num_time_mask: 1
time_mask_width_range: [0, 12]
time_warp_mode: bicubic
time_warp_window: 5
encoder:
_target_: modules.gaudio.sv_encoder.SenseVoiceEncoderSmall
attention_dropout_rate: 0.1
attention_heads: 4
dropout_rate: 0.1
kernel_size: 11
linear_units: 2048
normalize_before: true
num_blocks: 50
output_size: 512
sanm_shfit: 0
tp_blocks: 20
input_size: 560
length_normalized_loss: true
input_size: 560
vocab_size: 25055
sos: 1
eos: 2
ignore_id: -1