File size: 2,114 Bytes
0c8d55e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75

The original code is from [WISE](https://github.com/PKU-YuanGroup/WISE).

Environment:
```
pip install openai==0.28.0
```


## Eval

### Generate samples

```bash
# switch to univa env
MODEL_PATH='path/to/model'
OUTPUT_DIR='path/to/eval_output/wise'
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 torchrun \
  --nproc_per_node 8 \
  -m step1_gen_samples \
  wise.yaml \
  --pretrained_lvlm_name_or_path ${MODEL_PATH} \
  --output_dir ${OUTPUT_DIR}
```

### Evaluation

Evaluate using GPT-4o-2024-05-13:
Write your gpt-api-key to `--api_key`.

```bash
IMAGE_DIR=${OUTPUT_DIR}
python step2_gpt_eval.py \
    --json_path data/cultural_common_sense.json \
    --output_dir ${IMAGE_DIR}/Results/cultural_common_sense \
    --image_dir ${IMAGE_DIR} \
    --api_key "" \
    --model "gpt-4o-2024-05-13" \
    --result_full ${IMAGE_DIR}/Results/cultural_common_sense_full_results.json \
    --result_scores ${IMAGE_DIR}/Results/cultural_common_sense_scores_results.jsonl \
    --max_workers 96

IMAGE_DIR=${OUTPUT_DIR}
python step2_gpt_eval.py \
    --json_path data/spatio-temporal_reasoning.json \
    --output_dir ${IMAGE_DIR}/Results/spatio-temporal_reasoning \
    --image_dir ${IMAGE_DIR} \
    --api_key "" \
    --model "gpt-4o-2024-05-13" \
    --result_full ${IMAGE_DIR}/Results/spatio-temporal_reasoning_results.json \
    --result_scores ${IMAGE_DIR}/Results/spatio-temporal_reasoning_results.jsonl \
    --max_workers 96

IMAGE_DIR=${OUTPUT_DIR}
python step2_gpt_eval.py \
    --json_path data/natural_science.json \
    --output_dir ${IMAGE_DIR}/Results/natural_science \
    --image_dir ${IMAGE_DIR} \
    --api_key "" \
    --model "gpt-4o-2024-05-13" \
    --result_full ${IMAGE_DIR}/Results/natural_science_full_results.json \
    --result_scores ${IMAGE_DIR}/Results/natural_science_scores_results.jsonl \
    --max_workers 96
```

### Summary  

```bash
python step3_wise_cal.py \
    "${IMAGE_DIR}/Results/cultural_common_sense_scores_results.jsonl" \
    "${IMAGE_DIR}/Results/natural_science_scores_results.jsonl" \
    "${IMAGE_DIR}/Results/spatio-temporal_reasoning_results.jsonl" \
    --category all
```