includes: projects/task/crosstask.yaml model: model_cls: MMFusionSeparateActionLocalization mm_encoder_cls: video_encoder_cls: MMBertForEncoder text_encoder_cls: BertModel # dummy, not used. num_hidden_video_layers: 6 fairseq: checkpoint: restore_file: runs/task/checkpoint_best.pt # overwrite the default of VLM.