Spaces:
Runtime error
Runtime error
File size: 11,009 Bytes
be13417 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 |
"""
Copyright (c) 2022, salesforce.com, inc.
All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
"""
import json
import os
import pandas as pd
from tqdm import tqdm
from lavis.common.dist_utils import main_process, get_rank
from lavis.common.registry import registry
from lavis.tasks.base_task import BaseTask
from lavis.common.utils import is_convertible_to_int, is_url, cache_url
@registry.register_task("captioning")
class CaptionTask(BaseTask):
def __init__(self, num_beams, max_len, min_len, repetition_penalty, length_penalty, top_p, temperature, evaluate, report_metric=True, annotation_file=None, sample_id_key="image_id", caption_key="caption", split=["val"], load_gt_from_file=False, img_ids = []):
super().__init__()
self.num_beams = num_beams
self.max_len = max_len
self.min_len = min_len
self.repetition_penalty = repetition_penalty
self.length_penalty = length_penalty
self.top_p = top_p
self.temperature = temperature
self.evaluate = evaluate
self.report_metric = report_metric
self.annotation_file = annotation_file
self.sample_id_key = sample_id_key
self.caption_key = caption_key
assert len(split) == 1, "Only support one split for evaluation."
self.split = split[0]
self.load_gt_from_file = load_gt_from_file
self.img_ids = img_ids
@classmethod
def setup_task(cls, cfg):
run_cfg = cfg.run_cfg
num_beams = run_cfg.get("num_beams", 5)
max_len = run_cfg.get("max_len", 30)
min_len = run_cfg.get("min_len", 1)
repetition_penalty = run_cfg.get("repetition_penalty", 1.15)
length_penalty = run_cfg.get("length_penalty", 0.)
top_p = run_cfg.get("top_p", 0.9)
temperature = run_cfg.get("temperature", 1.)
evaluate = run_cfg.evaluate
report_metric = run_cfg.get("report_metric", True)
annotation_file = run_cfg.get("annotation_file", None)
sample_id_key = run_cfg.get("sample_id_key", "image_id")
caption_key = run_cfg.get("caption_key", "caption")
load_gt_from_file = run_cfg.get("load_gt_from_file", False)
split = run_cfg.get("valid_splits", ["val"])
img_ids = run_cfg.get("img_ids", []) # evaluate only subset of imgs
return cls(
num_beams=num_beams,
max_len=max_len,
min_len=min_len,
repetition_penalty=repetition_penalty,
length_penalty=length_penalty,
top_p=top_p,
temperature=temperature,
evaluate=evaluate,
report_metric=report_metric,
annotation_file=annotation_file,
sample_id_key=sample_id_key,
caption_key=caption_key,
split=split,
load_gt_from_file=load_gt_from_file,
img_ids=img_ids
)
def build_datasets(self, cfg):
datasets = super().build_datasets(cfg)
# get validation dataset name
val_ds_name = []
for name,d in datasets.items():
if self.split in d:
val_ds_name.append(name)
if not val_ds_name:
return datasets # no validation sets
assert len(val_ds_name) == 1, "Only support one dataset for validation"
val_ds_name = val_ds_name[0]
# get question file, annotation file and anwser list in COCO format
if self.annotation_file == None:
if 'coco' not in val_ds_name: # coco is already precomputed in dataset
self.annotation_file = os.path.join(registry.get_path("cache_root"),f'{val_ds_name}_gt', f'{val_ds_name}_{self.split}_annotations.json')
if get_rank() == 0:
os.makedirs(os.path.join(registry.get_path("cache_root"),f'{val_ds_name}_gt'), exist_ok=True)
convert_to_coco_gt(datasets[val_ds_name], self.annotation_file, self.caption_key, self.sample_id_key, self.split, load_gt_from_file=self.load_gt_from_file, img_ids=self.img_ids)
return datasets
def valid_step(self, model, samples):
results = []
# run_cfg = slf.cfg.run_cfg
captions = model.generate(
samples,
use_nucleus_sampling=False,
num_beams=self.num_beams,
max_length=self.max_len,
min_length=self.min_len,
repetition_penalty=self.repetition_penalty,
length_penalty=self.length_penalty,
top_p=self.top_p,
temperature=self.temperature,
)
img_ids = samples[self.sample_id_key]
for caption, img_id in zip(captions, img_ids):
# not all img_ids are ints
img_id = int(img_id) if is_convertible_to_int(img_id) else img_id
if self.img_ids and img_id not in self.img_ids: # only include specified img_ids if specified
continue
results.append({"caption": caption, "image_id": img_id})
return results
def after_evaluation(self, val_result, split_name, epoch, **kwargs):
eval_result_file = self.save_result(
result=val_result,
result_dir=registry.get_path("result_dir"),
filename="{}_epoch{}".format(split_name, epoch),
remove_duplicate="image_id",
)
if self.report_metric:
metrics = self._report_metrics(
eval_result_file=eval_result_file, split_name=split_name
)
else:
metrics = {"agg_metrics": 0.0}
return metrics
@main_process
def _report_metrics(self, eval_result_file, split_name):
if self.annotation_file == None:
# TODO better way to define this
coco_gt_root = os.path.join(registry.get_path("cache_root"), "coco_gt")
coco_val = coco_caption_eval(coco_gt_root, eval_result_file, split_name, img_ids=self.img_ids)
else:
coco_val = coco_caption_eval(None, eval_result_file, split_name, annotation_file=self.annotation_file, img_ids=self.img_ids)
agg_metrics = coco_val.eval["CIDEr"] + coco_val.eval["Bleu_4"]
log_stats = {split_name: {k: v for k, v in coco_val.eval.items()}}
with open(
os.path.join(registry.get_path("output_dir"), "evaluate.txt"), "a"
) as f:
f.write(json.dumps(log_stats) + "\n")
coco_res = {k: v for k, v in coco_val.eval.items()}
coco_res["agg_metrics"] = agg_metrics
return coco_res
def load_gt_file(file_path):
if is_url(file_path):
file_path = cache_url(file_path, registry.get_path("cache_root"))
data = []
if any(ext in file_path for ext in ['csv', 'tsv']):
df = pd.read_csv(file_path)
data.extend(df.to_dict(orient="records"))
elif 'jsonl' in file_path:
with open(file_path, "r") as f:
data.extend([json.loads(line) for line in f])
else:
with open(file_path, "r") as f:
loaded = json.load(f)
if isinstance(loaded, list):
data.extend(loaded)
elif isinstance(loaded, dict):
# assume that loaded data in file is the corresponding caption to the key
data.extend([{"sample_id": k, **v} if isinstance(v, dict) else {"sample_id": k, "caption": v} for k, v in loaded.items()])
return data
def convert_to_coco_gt(data, outpath, caption_key, sample_id_key, split, load_gt_from_file=False, img_ids=[]):
gt_data = {"annotations":[], "images":[]}
if load_gt_from_file:
print(f"Generating ground truth file for evaluation from {load_gt_from_file}....")
data = load_gt_file(load_gt_from_file)
for ann in data:
captions = ann[caption_key]
img_id = int(ann[sample_id_key]) if is_convertible_to_int(ann[sample_id_key]) else ann[sample_id_key]
if img_ids and img_id not in img_ids: # only include specified img_ids if specified
continue
gt_data["images"].append({"id":img_id})
if isinstance(captions, str):
gt_data["annotations"].append({"image_id":img_id, "caption":captions, "id":img_id})
else:
gt_data["annotations"].extend([{"image_id":img_id, "caption":c, "id":img_id} for c in captions])
else:
print(f"Generating ground truth file for evaluation....")
for i,ann in tqdm(enumerate(data[split])):
captions = data[split].annotation[i][caption_key]
img_id = int(ann[sample_id_key]) if is_convertible_to_int(ann[sample_id_key]) else ann[sample_id_key]
if img_ids and img_id not in img_ids: # only include specified img_ids if specified
continue
gt_data["images"].append({"id":img_id})
if isinstance(captions, str):
gt_data["annotations"].append({"image_id":img_id, "caption":captions, "id":img_id})
else:
gt_data["annotations"].extend([{"image_id":img_id, "caption":c, "id":img_id} for c in captions])
json.dump(gt_data, open(outpath, 'w'))
print(f"Saved annotations at {outpath}")
# TODO better structure for this.
from pycocoevalcap.eval import COCOEvalCap
from pycocotools.coco import COCO
from torchvision.datasets.utils import download_url
def coco_caption_eval(coco_gt_root, results_file, split, annotation_file=None, img_ids=[]):
if annotation_file == None:
urls = {
"val": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_val_gt.json",
"test": "https://storage.googleapis.com/sfr-vision-language-research/datasets/coco_karpathy_test_gt.json",
}
filenames = {
"val": "coco_karpathy_val_gt.json",
"test": "coco_karpathy_test_gt.json",
}
download_url(urls[split], coco_gt_root)
annotation_file = os.path.join(coco_gt_root, filenames[split])
if is_url(annotation_file):
annotation_file = cache_url(annotation_file, registry.get_path("cache_root"))
# create coco object and coco_result object
coco = COCO(annotation_file)
coco_result = coco.loadRes(results_file)
# create coco_eval object by taking coco and coco_result
coco_eval = COCOEvalCap(coco, coco_result)
# evaluate on a subset of images by setting
if img_ids:
coco_eval.params['image_id'] = coco_result.getImgIds()
# please remove this line when evaluating the full validation set
# coco_eval.params['image_id'] = coco_result.getImgIds()
# evaluate results
# SPICE will take a few minutes the first time, but speeds up due to caching
coco_eval.evaluate()
# print output evaluation scores
for metric, score in coco_eval.eval.items():
print(f"{metric}: {score:.3f}")
return coco_eval
|