# Constants. SAMPLE_RATE = 16000 # not higher than that otherwise we may have errors when computing the fbanks. # Train/Test sets share the same speakers. They contain different utterances. # 0.8 means 20% of the utterances of each speaker will be held out and placed in the test set. TRAIN_TEST_RATIO = 0.8 CHECKPOINTS_SOFTMAX_DIR = 'checkpoints-softmax' CHECKPOINTS_TRIPLET_DIR = 'checkpoints-triplets' BATCH_SIZE = 32 * 3 # have to be a multiple of 3. # Input to the model will be a 4D image: (batch_size, num_frames, num_fbanks, 3) # Where the 3 channels are: FBANK, DIFF(FBANK), DIFF(DIFF(FBANK)). NUM_FRAMES = 160 # 1 second ~ 100 frames with default params winlen=0.025,winstep=0.01 NUM_FBANKS = 64