# Copyright (c) 2022, salesforce.com, inc. # All rights reserved. # SPDX-License-Identifier: BSD-3-Clause # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause datasets: dataset_name: # same as the name of the train_config yaml file # data_dir: ${env.data_dir}/datasets data_type: images # let it be images for now even if it is videos build_info: # this is the information needed to build the dataset # Be careful not to append minus sign (-) before split to avoid itemizing ann_paths: [path/to/annotations_json] # list of paths to annotation files vis_root: path/to/videos_folder subtitles_path: path/to/subtitles_folder model_name: 'llama2' # Language Model Name (available: llama2, mistral)