Flash-VStream-demo / flash_vstream /eval_video /eval_any_dataset_features.py
zhanghaoji
init
eb0678a
raw
history blame
15.4 kB
# Copyright 2024 Flash-VStream Authors
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import argparse
import subprocess
import multiprocessing
def exec(cmd, sub=False, device=None):
print(f'exec: {cmd}')
if not sub:
if isinstance(cmd, list):
cmd = ' '.join(cmd)
os.system(cmd)
else:
my_env = os.environ.copy()
my_env["CUDA_VISIBLE_DEVICES"] = device
subprocess.run(cmd, env=my_env)
# multi gpu, feature
def eval_msvd(args):
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/MSVD-QA/video_features",
"--gt_file", "./data/eval_video/MSVD-QA/test_qa.json",
"--output_dir", os.path.join(model_path, "evaluation", "msvd"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1"]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "msvd"),
"--output_dir", os.path.join(model_path, "evaluation", "msvd", "results"),
"--output_json", os.path.join(model_path, "evaluation", "msvd", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_msrvtt(args):
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/MSRVTT-QA/video_features",
"--gt_file", "./data/eval_video/MSRVTT-QA/test_qa.json",
"--output_dir", os.path.join(model_path, "evaluation", "msrvtt"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1"]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "msrvtt"),
"--output_dir", os.path.join(model_path, "evaluation", "msrvtt", "results"),
"--output_json", os.path.join(model_path, "evaluation", "msrvtt", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_actnet(args):
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/ActivityNet-QA/video_features",
"--gt_file", "./data/eval_video/ActivityNet-QA/test_qa.json",
"--output_dir", os.path.join(model_path, "evaluation", "actnet"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "actnet"),
"--output_dir", os.path.join(model_path, "evaluation", "actnet", "results"),
"--output_json", os.path.join(model_path, "evaluation", "actnet", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_nextoe(args): # follow msvd format, OE follow actnet
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/nextoe/video_features",
"--gt_file", "./data/eval_video/nextoe/test_qa.json",
"--output_dir", os.path.join(model_path, "evaluation", "nextoe"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "nextoe"),
"--output_dir", os.path.join(model_path, "evaluation", "nextoe", "results"),
"--output_json", os.path.join(model_path, "evaluation", "nextoe", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_vsmovienet(args): # follow msvd format
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/vstream/movienet_video_features",
"--gt_file", "./data/eval_video/vstream/test_qa_movienet.json",
"--output_dir", os.path.join(model_path, "evaluation", "vsmovienet"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "vsmovienet"),
"--output_dir", os.path.join(model_path, "evaluation", "vsmovienet", "results"),
"--output_json", os.path.join(model_path, "evaluation", "vsmovienet", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_vsego4d(args): # follow msvd format
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/vstream/ego4d_video_features",
"--gt_file", "./data/eval_video/vstream/test_qa_ego4d.json",
"--output_dir", os.path.join(model_path, "evaluation", "vsego4d"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "vsego4d"),
"--output_dir", os.path.join(model_path, "evaluation", "vsego4d", "results"),
"--output_json", os.path.join(model_path, "evaluation", "vsego4d", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_realtime_vsmovienet(args): # follow msvd format
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/vstream-realtime/movienet_video_features",
"--gt_file", "./data/eval_video/vstream-realtime/test_qa_movienet.json",
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsmovienet"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "realtime_vsmovienet"),
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsmovienet", "results"),
"--output_json", os.path.join(model_path, "evaluation", "realtime_vsmovienet", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
# multi gpu, feature
def eval_realtime_vsego4d(args): # follow msvd format
model_path = args.model_path
num_chunks = args.num_chunks
if not args.only_eval:
processes = []
for idx in range(0, num_chunks):
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py",
"--model-path", model_path,
"--video_dir", "./data/eval_video/vstream-realtime/ego4d_video_features",
"--gt_file", "./data/eval_video/vstream-realtime/test_qa_ego4d.json",
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsego4d"),
"--output_name", "pred",
"--num-chunks", str(num_chunks),
"--chunk-idx", str(idx),
"--conv-mode", "vicuna_v1",
]
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx)))
processes.append(p)
p.start() # 启动子进程
for p in processes:
p.join()
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py",
"--pred_path", os.path.join(model_path, "evaluation", "realtime_vsego4d"),
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsego4d", "results"),
"--output_json", os.path.join(model_path, "evaluation", "realtime_vsego4d", "results.json"),
"--num_chunks", str(num_chunks),
"--num_tasks", "16",
"--api_key", args.api_key,
"--api_base", args.api_base,
"--api_type", args.api_type,
"--api_version", args.api_version,
]
exec(cmd)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--model-path", type=str, default="facebook/opt-350m")
parser.add_argument("--dataset", type=str, default=None)
parser.add_argument("--api_key", type=str, default=None)
parser.add_argument("--api_base", type=str, default=None)
parser.add_argument("--api_type", type=str, default=None)
parser.add_argument("--api_version", type=str, default=None)
parser.add_argument("--num_chunks", type=int, default=1)
parser.add_argument("--only_eval", action="store_true")
parser.add_argument("--vizlen", type=int, default=0)
parser.add_argument("--use_speech", action="store_true", default=False)
args = parser.parse_args()
func_dic = {'msvd': eval_msvd,
'msrvtt': eval_msrvtt,
'actnet': eval_actnet,
'nextoe': eval_nextoe,
'vsmovienet': eval_vsmovienet,
'vsego4d': eval_vsego4d,
'realtime_vsmovienet': eval_realtime_vsmovienet,
'realtime_vsego4d': eval_realtime_vsego4d,
}
if args.dataset in func_dic:
print(f'Execute {args.dataset} evaluation')
func_dic[args.dataset](args)