model1 / llava /datasets /internvid_dataset.py
multitensor's picture
Upload folder using huggingface_hub
bbfa6f6 verified
import os
import random
from llava.datasets.builder import DATASETS
from typing import Dict, Optional, Sequence, List
from llava.datasets.data_cfgs import data_configs
from llava.datasets.base_dataset import FramesTaskDataset
from llava.datasets.prompts import internvid_prompt
from llava.constants import DEFAULT_VIDEO_TOKEN
class InternVidDataset(FramesTaskDataset):
def __init__(self, anno_path, data_args=None, name='internvid'):
super().__init__(anno_path=anno_path,
data_args=data_args,
name=name)
def text_preprocess(self, item) -> List[Dict[str, str]]:
caption = item['caption']
conversations = [
{
'from': 'human',
'value': DEFAULT_VIDEO_TOKEN + random.choice(internvid_prompt)
},
{
'from': 'model',
'value': caption
}
]
return conversations
@DATASETS.register_obj
def internvid(data_args):
return InternVidDataset(data_configs["internvid"]['train_data_path'], data_args)