Spaces:
Runtime error
Runtime error
# Copyright 2024 Flash-VStream Authors | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import os | |
import argparse | |
import subprocess | |
import multiprocessing | |
def exec(cmd, sub=False, device=None): | |
print(f'exec: {cmd}') | |
if not sub: | |
if isinstance(cmd, list): | |
cmd = ' '.join(cmd) | |
os.system(cmd) | |
else: | |
my_env = os.environ.copy() | |
my_env["CUDA_VISIBLE_DEVICES"] = device | |
subprocess.run(cmd, env=my_env) | |
# multi gpu, feature | |
def eval_msvd(args): | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/MSVD-QA/video_features", | |
"--gt_file", "./data/eval_video/MSVD-QA/test_qa.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "msvd"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1"] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "msvd"), | |
"--output_dir", os.path.join(model_path, "evaluation", "msvd", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "msvd", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_msrvtt(args): | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/MSRVTT-QA/video_features", | |
"--gt_file", "./data/eval_video/MSRVTT-QA/test_qa.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "msrvtt"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1"] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "msrvtt"), | |
"--output_dir", os.path.join(model_path, "evaluation", "msrvtt", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "msrvtt", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_actnet(args): | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/ActivityNet-QA/video_features", | |
"--gt_file", "./data/eval_video/ActivityNet-QA/test_qa.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "actnet"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "actnet"), | |
"--output_dir", os.path.join(model_path, "evaluation", "actnet", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "actnet", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_nextoe(args): # follow msvd format, OE follow actnet | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/nextoe/video_features", | |
"--gt_file", "./data/eval_video/nextoe/test_qa.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "nextoe"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "nextoe"), | |
"--output_dir", os.path.join(model_path, "evaluation", "nextoe", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "nextoe", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_vsmovienet(args): # follow msvd format | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/vstream/movienet_video_features", | |
"--gt_file", "./data/eval_video/vstream/test_qa_movienet.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "vsmovienet"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "vsmovienet"), | |
"--output_dir", os.path.join(model_path, "evaluation", "vsmovienet", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "vsmovienet", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_vsego4d(args): # follow msvd format | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/vstream/ego4d_video_features", | |
"--gt_file", "./data/eval_video/vstream/test_qa_ego4d.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "vsego4d"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "vsego4d"), | |
"--output_dir", os.path.join(model_path, "evaluation", "vsego4d", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "vsego4d", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_realtime_vsmovienet(args): # follow msvd format | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/vstream-realtime/movienet_video_features", | |
"--gt_file", "./data/eval_video/vstream-realtime/test_qa_movienet.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsmovienet"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "realtime_vsmovienet"), | |
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsmovienet", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "realtime_vsmovienet", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
# multi gpu, feature | |
def eval_realtime_vsego4d(args): # follow msvd format | |
model_path = args.model_path | |
num_chunks = args.num_chunks | |
if not args.only_eval: | |
processes = [] | |
for idx in range(0, num_chunks): | |
cmd = ["python", "llama_vstream/eval_video/model_msvd_qa_featuresloader.py", | |
"--model-path", model_path, | |
"--video_dir", "./data/eval_video/vstream-realtime/ego4d_video_features", | |
"--gt_file", "./data/eval_video/vstream-realtime/test_qa_ego4d.json", | |
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsego4d"), | |
"--output_name", "pred", | |
"--num-chunks", str(num_chunks), | |
"--chunk-idx", str(idx), | |
"--conv-mode", "vicuna_v1", | |
] | |
p = multiprocessing.Process(target=exec, args=(cmd, True, str(idx))) | |
processes.append(p) | |
p.start() # 启动子进程 | |
for p in processes: | |
p.join() | |
cmd = ["python", "llama_vstream/eval_video/eval_activitynet_qa.py", | |
"--pred_path", os.path.join(model_path, "evaluation", "realtime_vsego4d"), | |
"--output_dir", os.path.join(model_path, "evaluation", "realtime_vsego4d", "results"), | |
"--output_json", os.path.join(model_path, "evaluation", "realtime_vsego4d", "results.json"), | |
"--num_chunks", str(num_chunks), | |
"--num_tasks", "16", | |
"--api_key", args.api_key, | |
"--api_base", args.api_base, | |
"--api_type", args.api_type, | |
"--api_version", args.api_version, | |
] | |
exec(cmd) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser() | |
parser.add_argument("--model-path", type=str, default="facebook/opt-350m") | |
parser.add_argument("--dataset", type=str, default=None) | |
parser.add_argument("--api_key", type=str, default=None) | |
parser.add_argument("--api_base", type=str, default=None) | |
parser.add_argument("--api_type", type=str, default=None) | |
parser.add_argument("--api_version", type=str, default=None) | |
parser.add_argument("--num_chunks", type=int, default=1) | |
parser.add_argument("--only_eval", action="store_true") | |
parser.add_argument("--vizlen", type=int, default=0) | |
parser.add_argument("--use_speech", action="store_true", default=False) | |
args = parser.parse_args() | |
func_dic = {'msvd': eval_msvd, | |
'msrvtt': eval_msrvtt, | |
'actnet': eval_actnet, | |
'nextoe': eval_nextoe, | |
'vsmovienet': eval_vsmovienet, | |
'vsego4d': eval_vsego4d, | |
'realtime_vsmovienet': eval_realtime_vsmovienet, | |
'realtime_vsego4d': eval_realtime_vsego4d, | |
} | |
if args.dataset in func_dic: | |
print(f'Execute {args.dataset} evaluation') | |
func_dic[args.dataset](args) | |