frontend: WavFrontendOnline | |
frontend_conf: | |
fs: 16000 | |
window: hamming | |
n_mels: 80 | |
frame_length: 25 | |
frame_shift: 10 | |
dither: 0.0 | |
lfr_m: 5 | |
lfr_n: 1 | |
model: FsmnVADStreaming | |
model_conf: | |
sample_rate: 16000 | |
detect_mode: 1 | |
snr_mode: 0 | |
max_end_silence_time: 800 | |
max_start_silence_time: 3000 | |
do_start_point_detection: True | |
do_end_point_detection: True | |
window_size_ms: 200 | |
sil_to_speech_time_thres: 150 | |
speech_to_sil_time_thres: 150 | |
speech_2_noise_ratio: 1.0 | |
do_extend: 1 | |
lookback_time_start_point: 200 | |
lookahead_time_end_point: 100 | |
max_single_segment_time: 60000 | |
snr_thres: -100.0 | |
noise_frame_num_used_for_snr: 100 | |
decibel_thres: -100.0 | |
speech_noise_thres: 0.6 | |
fe_prior_thres: 0.0001 | |
silence_pdf_num: 1 | |
sil_pdf_ids: [0] | |
speech_noise_thresh_low: -0.1 | |
speech_noise_thresh_high: 0.3 | |
output_frame_probs: False | |
frame_in_ms: 10 | |
frame_length_ms: 25 | |
encoder: FSMN | |
encoder_conf: | |
input_dim: 400 | |
input_affine_dim: 140 | |
fsmn_layers: 4 | |
linear_dim: 250 | |
proj_dim: 128 | |
lorder: 20 | |
rorder: 0 | |
lstride: 1 | |
rstride: 0 | |
output_affine_dim: 140 | |
output_dim: 248 | |