includes: projects/task/vtt.yaml model: model_cls: MMFusionSeparate mm_encoder_cls: video_encoder_cls: MMBertForEncoder text_encoder_cls: BertModel num_hidden_video_layers: 6 fairseq: dataset: batch_size: 224 # model_cls: MMFusionShare # mm_encoder_cls: MMBertForEncoder