Spaces:
Runtime error
Runtime error
TRAIN: | |
ENABLE: True | |
DATASET: imagenet | |
BATCH_SIZE: 256 | |
EVAL_PERIOD: 10 | |
CHECKPOINT_PERIOD: 1 | |
AUTO_RESUME: True | |
DATA: | |
# PATH_TO_DATA_DIR: path-to-imagenet-dir | |
MEAN: [0.485, 0.456, 0.406] | |
STD: [0.229, 0.224, 0.225] | |
NUM_FRAMES: 64 | |
TRAIN_CROP_SIZE: 224 | |
TEST_CROP_SIZE: 224 | |
INPUT_CHANNEL_NUM: [3] | |
MVIT: | |
PATCH_2D: False | |
ZERO_DECAY_POS_CLS: False | |
MODE: "conv" | |
CLS_EMBED_ON: False | |
PATCH_KERNEL: [3, 7, 7] | |
PATCH_STRIDE: [2, 4, 4] | |
PATCH_PADDING: [1, 3, 3] | |
EMBED_DIM: 96 | |
NUM_HEADS: 1 | |
MLP_RATIO: 4.0 | |
QKV_BIAS: True | |
DROPPATH_RATE: 0.1 | |
DROPOUT_RATE: 0.0 | |
DEPTH: 16 | |
LAYER_SCALE_INIT_VALUE: 0.0 | |
HEAD_INIT_SCALE: 1.0 | |
USE_MEAN_POOLING: False | |
USE_ABS_POS: True | |
USE_FIXED_SINCOS_POS: False | |
SEP_POS_EMBED: False | |
REL_POS_SPATIAL: False | |
REL_POS_TEMPORAL: False | |
REL_POS_ZERO_INIT: False | |
RESIDUAL_POOLING: False | |
NORM: "layernorm" | |
NORM_STEM: False | |
DIM_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]] | |
HEAD_MUL: [[1, 2.0], [3, 2.0], [14, 2.0]] | |
POOL_FIRST: null | |
POOL_KVQ_KERNEL: [1, 3, 3] | |
POOL_KV_STRIDE_ADAPTIVE: [1, 4, 4] | |
POOL_Q_STRIDE: [[1, 1, 2, 2], [3, 1, 2, 2], [14, 1, 2, 2]] | |
SEPARATE_QKV : True | |
REV: | |
ENABLE: True | |
RESPATH_FUSE: "concat" | |
BUFFER_LAYERS : [1,3, 14] | |
RES_PATH : "conv" | |
PRE_Q_FUSION: "concat_linear_2" | |
DETECTION: | |
ENABLE: False | |
AUG: | |
ENABLE: True | |
COLOR_JITTER: 0.4 | |
AA_TYPE: rand-m9-n6-mstd0.5-inc1 | |
INTERPOLATION: bicubic | |
RE_PROB: 0.25 | |
RE_MODE: pixel | |
RE_COUNT: 1 | |
RE_SPLIT: False | |
MIXUP: | |
ENABLE: True | |
ALPHA: 0.8 | |
CUTMIX_ALPHA: 1.0 | |
PROB: 1.0 | |
SWITCH_PROB: 0.5 | |
LABEL_SMOOTH_VALUE: 0.1 | |
SOLVER: | |
BASE_LR_SCALE_NUM_SHARDS: True | |
BASE_LR: 0.00025 | |
LR_POLICY: cosine | |
MAX_EPOCH: 300 | |
MOMENTUM: 0.9 | |
WEIGHT_DECAY: 0.05 | |
WARMUP_EPOCHS: 70.0 | |
WARMUP_START_LR: 1e-8 | |
OPTIMIZING_METHOD: adamw | |
COSINE_AFTER_WARMUP: True | |
COSINE_END_LR: 1e-6 | |
ZERO_WD_1D_PARAM: True | |
CLIP_GRAD_L2NORM: 1.0 | |
MODEL: | |
NUM_CLASSES: 1000 | |
ARCH: mvit | |
MODEL_NAME: MViT | |
LOSS_FUNC: soft_cross_entropy | |
DROPOUT_RATE: 0.0 | |
HEAD_ACT: "softmax" | |
DETACH_FINAL_FC: False | |
CONTRASTIVE: | |
NUM_MLP_LAYERS: 1 | |
TEST: | |
ENABLE: False | |
DATASET: imagenet | |
BATCH_SIZE: 256 | |
DATA_LOADER: | |
NUM_WORKERS: 8 | |
PIN_MEMORY: True | |
NUM_GPUS: 2 | |
NUM_SHARDS: 1 | |
RNG_SEED: 0 | |
OUTPUT_DIR: . |