11Labs-TTS-Free-VC

Running

first commit

c56c253 almost 2 years ago

901 Bytes


	## Mel-filterbank
	mel_window_length = 25 # In milliseconds
	mel_window_step = 10 # In milliseconds
	mel_n_channels = 40


	## Audio
	sampling_rate = 16000
	# Number of spectrogram frames in a partial utterance
	partials_n_frames = 160 # 1600 ms
	# Number of spectrogram frames at inference
	inference_n_frames = 80 # 800 ms


	## Voice Activation Detection
	# Window size of the VAD. Must be either 10, 20 or 30 milliseconds.
	# This sets the granularity of the VAD. Should not need to be changed.
	vad_window_length = 30 # In milliseconds
	# Number of frames to average together when performing the moving average smoothing.
	# The larger this value, the larger the VAD variations must be to not get smoothed out.
	vad_moving_average_width = 8
	# Maximum number of consecutive silent frames a segment can have.
	vad_max_silence_length = 6


	## Audio volume normalization
	audio_norm_target_dBFS = -30