File size: 715 Bytes
aed64b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# Constants.

SAMPLE_RATE = 16000  # not higher than that otherwise we may have errors when computing the fbanks.

# Train/Test sets share the same speakers. They contain different utterances.
# 0.8 means 20% of the utterances of each speaker will be held out and placed in the test set.
TRAIN_TEST_RATIO = 0.8

CHECKPOINTS_SOFTMAX_DIR = 'checkpoints-softmax'

CHECKPOINTS_TRIPLET_DIR = 'checkpoints-triplets'

BATCH_SIZE = 32 * 3  # have to be a multiple of 3.

# Input to the model will be a 4D image: (batch_size, num_frames, num_fbanks, 3)
# Where the 3 channels are: FBANK, DIFF(FBANK), DIFF(DIFF(FBANK)).
NUM_FRAMES = 160  # 1 second ~ 100 frames with default params winlen=0.025,winstep=0.01
NUM_FBANKS = 64