VALL-E-X / models /macros.py
Plachta's picture
initial commit
b1e1a76
raw
history blame contribute delete
201 Bytes
# Text
NUM_TEXT_TOKENS = 2048
# Audio
NUM_AUDIO_TOKENS = 1024 # EnCodec RVQ bins
NUM_MEL_BINS = 100 # BigVGAN bigvgan_24khz_100band
# Speaker
NUM_SPEAKER_CLASSES = 4096
SPEAKER_EMBEDDING_DIM = 64