bobox commited on
Commit
4b4d7d3
·
verified ·
1 Parent(s): 3db76eb

Training in progress, step 774, checkpoint

Browse files
checkpoint-774/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 1536,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-774/README.md ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-774/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-774/config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v2-xlarge",
3
+ "architectures": [
4
+ "DebertaV2Model"
5
+ ],
6
+ "attention_head_size": 64,
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "conv_act": "gelu",
9
+ "conv_kernel_size": 3,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 1536,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6144,
15
+ "layer_norm_eps": 1e-07,
16
+ "max_position_embeddings": 512,
17
+ "max_relative_positions": -1,
18
+ "model_type": "deberta-v2",
19
+ "norm_rel_ebd": "layer_norm",
20
+ "num_attention_heads": 24,
21
+ "num_hidden_layers": 24,
22
+ "pad_token_id": 0,
23
+ "pooler_dropout": 0,
24
+ "pooler_hidden_act": "gelu",
25
+ "pooler_hidden_size": 1536,
26
+ "pos_att_type": [
27
+ "p2c",
28
+ "c2p"
29
+ ],
30
+ "position_biased_input": false,
31
+ "position_buckets": 256,
32
+ "relative_attention": true,
33
+ "share_att_key": true,
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.42.4",
36
+ "type_vocab_size": 0,
37
+ "vocab_size": 128100
38
+ }
checkpoint-774/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.4",
5
+ "pytorch": "2.4.0+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-774/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-774/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662df4f2f0fbeb6e1b3a99a805f4d11322ba02902a285bdd26ee1e996fb17a17
3
+ size 7077084396
checkpoint-774/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7185152f00bf7f56635781b1edde77058bdbfaa9fd90ac19033b9463509aee12
3
+ size 3538506546
checkpoint-774/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05031475c60a441593bf4e55fb01f55e97ed5066ad950e7e50bf95424e9fdfb5
3
+ size 14244
checkpoint-774/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:237b146b1bcb3ad2d4b4853ed36f6f02f210b151266e0c3b3aa976abc44aeb00
3
+ size 1064
checkpoint-774/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-774/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-774/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5598d5e96f339a8d980c15f9afd405a2e5e1be7db41de3ed13b0f03fac1e8c17
3
+ size 2447305
checkpoint-774/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-774/tokenizer_config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": true,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "mask_token": "[MASK]",
50
+ "model_max_length": 1000000000000000019884624838656,
51
+ "pad_token": "[PAD]",
52
+ "sep_token": "[SEP]",
53
+ "sp_model_kwargs": {},
54
+ "split_by_punct": false,
55
+ "tokenizer_class": "DebertaV2Tokenizer",
56
+ "unk_token": "[UNK]",
57
+ "vocab_type": "spm"
58
+ }
checkpoint-774/trainer_state.json ADDED
@@ -0,0 +1,1650 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.40082858622475404,
5
+ "eval_steps": 97,
6
+ "global_step": 774,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.010357327809425169,
13
+ "grad_norm": 160.78689575195312,
14
+ "learning_rate": 3.529411764705882e-07,
15
+ "loss": 10.2062,
16
+ "step": 20
17
+ },
18
+ {
19
+ "epoch": 0.020714655618850338,
20
+ "grad_norm": 303.3471374511719,
21
+ "learning_rate": 8.000000000000001e-07,
22
+ "loss": 7.9221,
23
+ "step": 40
24
+ },
25
+ {
26
+ "epoch": 0.031071983428275506,
27
+ "grad_norm": 81.0146484375,
28
+ "learning_rate": 1.2235294117647059e-06,
29
+ "loss": 5.9499,
30
+ "step": 60
31
+ },
32
+ {
33
+ "epoch": 0.041429311237700675,
34
+ "grad_norm": 88.74897003173828,
35
+ "learning_rate": 1.6470588235294118e-06,
36
+ "loss": 6.0555,
37
+ "step": 80
38
+ },
39
+ {
40
+ "epoch": 0.050233039875712066,
41
+ "eval_Qnli-dev_cosine_accuracy": 0.62109375,
42
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.9108127355575562,
43
+ "eval_Qnli-dev_cosine_ap": 0.6197524033200674,
44
+ "eval_Qnli-dev_cosine_f1": 0.6396396396396397,
45
+ "eval_Qnli-dev_cosine_f1_threshold": 0.8376526832580566,
46
+ "eval_Qnli-dev_cosine_precision": 0.49534883720930234,
47
+ "eval_Qnli-dev_cosine_recall": 0.902542372881356,
48
+ "eval_Qnli-dev_dot_accuracy": 0.58984375,
49
+ "eval_Qnli-dev_dot_accuracy_threshold": 865.2555541992188,
50
+ "eval_Qnli-dev_dot_ap": 0.5567642852275692,
51
+ "eval_Qnli-dev_dot_f1": 0.6363636363636364,
52
+ "eval_Qnli-dev_dot_f1_threshold": 691.9456787109375,
53
+ "eval_Qnli-dev_dot_precision": 0.47863247863247865,
54
+ "eval_Qnli-dev_dot_recall": 0.9491525423728814,
55
+ "eval_Qnli-dev_euclidean_accuracy": 0.609375,
56
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 13.323524475097656,
57
+ "eval_Qnli-dev_euclidean_ap": 0.6115116478210071,
58
+ "eval_Qnli-dev_euclidean_f1": 0.6449612403100775,
59
+ "eval_Qnli-dev_euclidean_f1_threshold": 16.585830688476562,
60
+ "eval_Qnli-dev_euclidean_precision": 0.508557457212714,
61
+ "eval_Qnli-dev_euclidean_recall": 0.8813559322033898,
62
+ "eval_Qnli-dev_manhattan_accuracy": 0.619140625,
63
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 406.1038818359375,
64
+ "eval_Qnli-dev_manhattan_ap": 0.609904024113499,
65
+ "eval_Qnli-dev_manhattan_f1": 0.6494345718901454,
66
+ "eval_Qnli-dev_manhattan_f1_threshold": 484.52716064453125,
67
+ "eval_Qnli-dev_manhattan_precision": 0.5248041775456919,
68
+ "eval_Qnli-dev_manhattan_recall": 0.8516949152542372,
69
+ "eval_Qnli-dev_max_accuracy": 0.62109375,
70
+ "eval_Qnli-dev_max_accuracy_threshold": 865.2555541992188,
71
+ "eval_Qnli-dev_max_ap": 0.6197524033200674,
72
+ "eval_Qnli-dev_max_f1": 0.6494345718901454,
73
+ "eval_Qnli-dev_max_f1_threshold": 691.9456787109375,
74
+ "eval_Qnli-dev_max_precision": 0.5248041775456919,
75
+ "eval_Qnli-dev_max_recall": 0.9491525423728814,
76
+ "eval_allNLI-dev_cosine_accuracy": 0.67578125,
77
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.9652533531188965,
78
+ "eval_allNLI-dev_cosine_ap": 0.4282858392784667,
79
+ "eval_allNLI-dev_cosine_f1": 0.515527950310559,
80
+ "eval_allNLI-dev_cosine_f1_threshold": 0.798592746257782,
81
+ "eval_allNLI-dev_cosine_precision": 0.3524416135881104,
82
+ "eval_allNLI-dev_cosine_recall": 0.9595375722543352,
83
+ "eval_allNLI-dev_dot_accuracy": 0.666015625,
84
+ "eval_allNLI-dev_dot_accuracy_threshold": 968.9529418945312,
85
+ "eval_allNLI-dev_dot_ap": 0.36425260705842155,
86
+ "eval_allNLI-dev_dot_f1": 0.5162287480680062,
87
+ "eval_allNLI-dev_dot_f1_threshold": 686.5814208984375,
88
+ "eval_allNLI-dev_dot_precision": 0.35232067510548526,
89
+ "eval_allNLI-dev_dot_recall": 0.9653179190751445,
90
+ "eval_allNLI-dev_euclidean_accuracy": 0.67578125,
91
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 8.16073226928711,
92
+ "eval_allNLI-dev_euclidean_ap": 0.4333583117036793,
93
+ "eval_allNLI-dev_euclidean_f1": 0.5164319248826291,
94
+ "eval_allNLI-dev_euclidean_f1_threshold": 18.877037048339844,
95
+ "eval_allNLI-dev_euclidean_precision": 0.3540772532188841,
96
+ "eval_allNLI-dev_euclidean_recall": 0.953757225433526,
97
+ "eval_allNLI-dev_manhattan_accuracy": 0.67578125,
98
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 226.18099975585938,
99
+ "eval_allNLI-dev_manhattan_ap": 0.4400955405569059,
100
+ "eval_allNLI-dev_manhattan_f1": 0.5179407176287052,
101
+ "eval_allNLI-dev_manhattan_f1_threshold": 570.2012329101562,
102
+ "eval_allNLI-dev_manhattan_precision": 0.3547008547008547,
103
+ "eval_allNLI-dev_manhattan_recall": 0.9595375722543352,
104
+ "eval_allNLI-dev_max_accuracy": 0.67578125,
105
+ "eval_allNLI-dev_max_accuracy_threshold": 968.9529418945312,
106
+ "eval_allNLI-dev_max_ap": 0.4400955405569059,
107
+ "eval_allNLI-dev_max_f1": 0.5179407176287052,
108
+ "eval_allNLI-dev_max_f1_threshold": 686.5814208984375,
109
+ "eval_allNLI-dev_max_precision": 0.3547008547008547,
110
+ "eval_allNLI-dev_max_recall": 0.9653179190751445,
111
+ "eval_sequential_score": 0.6197524033200674,
112
+ "eval_sts-test_pearson_cosine": 0.6170839897033953,
113
+ "eval_sts-test_pearson_dot": 0.43346770865150264,
114
+ "eval_sts-test_pearson_euclidean": 0.6474775644966124,
115
+ "eval_sts-test_pearson_manhattan": 0.6616828287248389,
116
+ "eval_sts-test_pearson_max": 0.6616828287248389,
117
+ "eval_sts-test_spearman_cosine": 0.6552392427969004,
118
+ "eval_sts-test_spearman_dot": 0.4585595522909849,
119
+ "eval_sts-test_spearman_euclidean": 0.652406174691995,
120
+ "eval_sts-test_spearman_manhattan": 0.6662387448368152,
121
+ "eval_sts-test_spearman_max": 0.6662387448368152,
122
+ "eval_vitaminc-pairs_loss": 3.7554073333740234,
123
+ "eval_vitaminc-pairs_runtime": 4.7418,
124
+ "eval_vitaminc-pairs_samples_per_second": 26.994,
125
+ "eval_vitaminc-pairs_steps_per_second": 0.211,
126
+ "step": 97
127
+ },
128
+ {
129
+ "epoch": 0.050233039875712066,
130
+ "eval_negation-triplets_loss": 3.6897997856140137,
131
+ "eval_negation-triplets_runtime": 3.1578,
132
+ "eval_negation-triplets_samples_per_second": 40.534,
133
+ "eval_negation-triplets_steps_per_second": 0.317,
134
+ "step": 97
135
+ },
136
+ {
137
+ "epoch": 0.050233039875712066,
138
+ "eval_scitail-pairs-pos_loss": 0.45563364028930664,
139
+ "eval_scitail-pairs-pos_runtime": 2.7223,
140
+ "eval_scitail-pairs-pos_samples_per_second": 47.02,
141
+ "eval_scitail-pairs-pos_steps_per_second": 0.367,
142
+ "step": 97
143
+ },
144
+ {
145
+ "epoch": 0.050233039875712066,
146
+ "eval_scitail-pairs-qa_loss": 1.115855097770691,
147
+ "eval_scitail-pairs-qa_runtime": 2.2597,
148
+ "eval_scitail-pairs-qa_samples_per_second": 56.646,
149
+ "eval_scitail-pairs-qa_steps_per_second": 0.443,
150
+ "step": 97
151
+ },
152
+ {
153
+ "epoch": 0.050233039875712066,
154
+ "eval_xsum-pairs_loss": 3.2118453979492188,
155
+ "eval_xsum-pairs_runtime": 3.0538,
156
+ "eval_xsum-pairs_samples_per_second": 41.915,
157
+ "eval_xsum-pairs_steps_per_second": 0.327,
158
+ "step": 97
159
+ },
160
+ {
161
+ "epoch": 0.050233039875712066,
162
+ "eval_sciq_pairs_loss": 0.484823614358902,
163
+ "eval_sciq_pairs_runtime": 3.959,
164
+ "eval_sciq_pairs_samples_per_second": 32.331,
165
+ "eval_sciq_pairs_steps_per_second": 0.253,
166
+ "step": 97
167
+ },
168
+ {
169
+ "epoch": 0.050233039875712066,
170
+ "eval_qasc_pairs_loss": 2.8566131591796875,
171
+ "eval_qasc_pairs_runtime": 2.1087,
172
+ "eval_qasc_pairs_samples_per_second": 60.701,
173
+ "eval_qasc_pairs_steps_per_second": 0.474,
174
+ "step": 97
175
+ },
176
+ {
177
+ "epoch": 0.050233039875712066,
178
+ "eval_openbookqa_pairs_loss": 2.1501104831695557,
179
+ "eval_openbookqa_pairs_runtime": 2.2555,
180
+ "eval_openbookqa_pairs_samples_per_second": 56.751,
181
+ "eval_openbookqa_pairs_steps_per_second": 0.443,
182
+ "step": 97
183
+ },
184
+ {
185
+ "epoch": 0.050233039875712066,
186
+ "eval_msmarco_pairs_loss": 4.395960807800293,
187
+ "eval_msmarco_pairs_runtime": 2.2407,
188
+ "eval_msmarco_pairs_samples_per_second": 57.125,
189
+ "eval_msmarco_pairs_steps_per_second": 0.446,
190
+ "step": 97
191
+ },
192
+ {
193
+ "epoch": 0.050233039875712066,
194
+ "eval_nq_pairs_loss": 4.488173484802246,
195
+ "eval_nq_pairs_runtime": 2.7484,
196
+ "eval_nq_pairs_samples_per_second": 46.572,
197
+ "eval_nq_pairs_steps_per_second": 0.364,
198
+ "step": 97
199
+ },
200
+ {
201
+ "epoch": 0.050233039875712066,
202
+ "eval_trivia_pairs_loss": 4.023955345153809,
203
+ "eval_trivia_pairs_runtime": 3.7908,
204
+ "eval_trivia_pairs_samples_per_second": 33.766,
205
+ "eval_trivia_pairs_steps_per_second": 0.264,
206
+ "step": 97
207
+ },
208
+ {
209
+ "epoch": 0.050233039875712066,
210
+ "eval_gooaq_pairs_loss": 3.383638858795166,
211
+ "eval_gooaq_pairs_runtime": 2.1349,
212
+ "eval_gooaq_pairs_samples_per_second": 59.957,
213
+ "eval_gooaq_pairs_steps_per_second": 0.468,
214
+ "step": 97
215
+ },
216
+ {
217
+ "epoch": 0.050233039875712066,
218
+ "eval_paws-pos_loss": 0.12275903671979904,
219
+ "eval_paws-pos_runtime": 2.2818,
220
+ "eval_paws-pos_samples_per_second": 56.095,
221
+ "eval_paws-pos_steps_per_second": 0.438,
222
+ "step": 97
223
+ },
224
+ {
225
+ "epoch": 0.050233039875712066,
226
+ "eval_global_dataset_loss": 1.9564138650894165,
227
+ "eval_global_dataset_runtime": 10.2217,
228
+ "eval_global_dataset_samples_per_second": 40.698,
229
+ "eval_global_dataset_steps_per_second": 0.391,
230
+ "step": 97
231
+ },
232
+ {
233
+ "epoch": 0.05178663904712584,
234
+ "grad_norm": 83.8360824584961,
235
+ "learning_rate": 2.1176470588235296e-06,
236
+ "loss": 4.0315,
237
+ "step": 100
238
+ },
239
+ {
240
+ "epoch": 0.06214396685655101,
241
+ "grad_norm": 325.5680236816406,
242
+ "learning_rate": 2.588235294117647e-06,
243
+ "loss": 1.6348,
244
+ "step": 120
245
+ },
246
+ {
247
+ "epoch": 0.07250129466597618,
248
+ "grad_norm": 106.99758911132812,
249
+ "learning_rate": 3.0588235294117647e-06,
250
+ "loss": 1.1866,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 0.08285862247540135,
255
+ "grad_norm": 30.390771865844727,
256
+ "learning_rate": 3.5294117647058825e-06,
257
+ "loss": 0.6138,
258
+ "step": 160
259
+ },
260
+ {
261
+ "epoch": 0.09321595028482652,
262
+ "grad_norm": 39.691532135009766,
263
+ "learning_rate": 4e-06,
264
+ "loss": 0.5244,
265
+ "step": 180
266
+ },
267
+ {
268
+ "epoch": 0.10046607975142413,
269
+ "eval_Qnli-dev_cosine_accuracy": 0.666015625,
270
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.7696025967597961,
271
+ "eval_Qnli-dev_cosine_ap": 0.693851901846308,
272
+ "eval_Qnli-dev_cosine_f1": 0.6625000000000001,
273
+ "eval_Qnli-dev_cosine_f1_threshold": 0.6638460159301758,
274
+ "eval_Qnli-dev_cosine_precision": 0.5247524752475248,
275
+ "eval_Qnli-dev_cosine_recall": 0.8983050847457628,
276
+ "eval_Qnli-dev_dot_accuracy": 0.6796875,
277
+ "eval_Qnli-dev_dot_accuracy_threshold": 822.6981201171875,
278
+ "eval_Qnli-dev_dot_ap": 0.6603086879421342,
279
+ "eval_Qnli-dev_dot_f1": 0.657856093979442,
280
+ "eval_Qnli-dev_dot_f1_threshold": 618.4547119140625,
281
+ "eval_Qnli-dev_dot_precision": 0.503370786516854,
282
+ "eval_Qnli-dev_dot_recall": 0.9491525423728814,
283
+ "eval_Qnli-dev_euclidean_accuracy": 0.666015625,
284
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 19.874027252197266,
285
+ "eval_Qnli-dev_euclidean_ap": 0.694158709095853,
286
+ "eval_Qnli-dev_euclidean_f1": 0.6630236794171221,
287
+ "eval_Qnli-dev_euclidean_f1_threshold": 23.005264282226562,
288
+ "eval_Qnli-dev_euclidean_precision": 0.5814696485623003,
289
+ "eval_Qnli-dev_euclidean_recall": 0.7711864406779662,
290
+ "eval_Qnli-dev_manhattan_accuracy": 0.66796875,
291
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 624.285888671875,
292
+ "eval_Qnli-dev_manhattan_ap": 0.692633721446368,
293
+ "eval_Qnli-dev_manhattan_f1": 0.6641366223908918,
294
+ "eval_Qnli-dev_manhattan_f1_threshold": 696.709716796875,
295
+ "eval_Qnli-dev_manhattan_precision": 0.6013745704467354,
296
+ "eval_Qnli-dev_manhattan_recall": 0.7415254237288136,
297
+ "eval_Qnli-dev_max_accuracy": 0.6796875,
298
+ "eval_Qnli-dev_max_accuracy_threshold": 822.6981201171875,
299
+ "eval_Qnli-dev_max_ap": 0.694158709095853,
300
+ "eval_Qnli-dev_max_f1": 0.6641366223908918,
301
+ "eval_Qnli-dev_max_f1_threshold": 696.709716796875,
302
+ "eval_Qnli-dev_max_precision": 0.6013745704467354,
303
+ "eval_Qnli-dev_max_recall": 0.9491525423728814,
304
+ "eval_allNLI-dev_cosine_accuracy": 0.701171875,
305
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.854247510433197,
306
+ "eval_allNLI-dev_cosine_ap": 0.5504250327111149,
307
+ "eval_allNLI-dev_cosine_f1": 0.567287784679089,
308
+ "eval_allNLI-dev_cosine_f1_threshold": 0.7080726623535156,
309
+ "eval_allNLI-dev_cosine_precision": 0.44193548387096776,
310
+ "eval_allNLI-dev_cosine_recall": 0.791907514450867,
311
+ "eval_allNLI-dev_dot_accuracy": 0.69921875,
312
+ "eval_allNLI-dev_dot_accuracy_threshold": 885.8963623046875,
313
+ "eval_allNLI-dev_dot_ap": 0.5371398846089106,
314
+ "eval_allNLI-dev_dot_f1": 0.5720338983050848,
315
+ "eval_allNLI-dev_dot_f1_threshold": 732.1597290039062,
316
+ "eval_allNLI-dev_dot_precision": 0.451505016722408,
317
+ "eval_allNLI-dev_dot_recall": 0.7803468208092486,
318
+ "eval_allNLI-dev_euclidean_accuracy": 0.701171875,
319
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 16.9801082611084,
320
+ "eval_allNLI-dev_euclidean_ap": 0.5503780840587245,
321
+ "eval_allNLI-dev_euclidean_f1": 0.5671641791044777,
322
+ "eval_allNLI-dev_euclidean_f1_threshold": 24.19074821472168,
323
+ "eval_allNLI-dev_euclidean_precision": 0.44932432432432434,
324
+ "eval_allNLI-dev_euclidean_recall": 0.7687861271676301,
325
+ "eval_allNLI-dev_manhattan_accuracy": 0.703125,
326
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 529.9462280273438,
327
+ "eval_allNLI-dev_manhattan_ap": 0.5524969745859143,
328
+ "eval_allNLI-dev_manhattan_f1": 0.5638297872340425,
329
+ "eval_allNLI-dev_manhattan_f1_threshold": 826.8560791015625,
330
+ "eval_allNLI-dev_manhattan_precision": 0.40664961636828645,
331
+ "eval_allNLI-dev_manhattan_recall": 0.9190751445086706,
332
+ "eval_allNLI-dev_max_accuracy": 0.703125,
333
+ "eval_allNLI-dev_max_accuracy_threshold": 885.8963623046875,
334
+ "eval_allNLI-dev_max_ap": 0.5524969745859143,
335
+ "eval_allNLI-dev_max_f1": 0.5720338983050848,
336
+ "eval_allNLI-dev_max_f1_threshold": 826.8560791015625,
337
+ "eval_allNLI-dev_max_precision": 0.451505016722408,
338
+ "eval_allNLI-dev_max_recall": 0.9190751445086706,
339
+ "eval_sequential_score": 0.694158709095853,
340
+ "eval_sts-test_pearson_cosine": 0.8866994033223972,
341
+ "eval_sts-test_pearson_dot": 0.8712266973511624,
342
+ "eval_sts-test_pearson_euclidean": 0.9028053322103908,
343
+ "eval_sts-test_pearson_manhattan": 0.9029714248344419,
344
+ "eval_sts-test_pearson_max": 0.9029714248344419,
345
+ "eval_sts-test_spearman_cosine": 0.8941879764786184,
346
+ "eval_sts-test_spearman_dot": 0.8632849034222648,
347
+ "eval_sts-test_spearman_euclidean": 0.8944520984233506,
348
+ "eval_sts-test_spearman_manhattan": 0.8945218656398598,
349
+ "eval_sts-test_spearman_max": 0.8945218656398598,
350
+ "eval_vitaminc-pairs_loss": 3.507073163986206,
351
+ "eval_vitaminc-pairs_runtime": 4.4774,
352
+ "eval_vitaminc-pairs_samples_per_second": 28.588,
353
+ "eval_vitaminc-pairs_steps_per_second": 0.223,
354
+ "step": 194
355
+ },
356
+ {
357
+ "epoch": 0.10046607975142413,
358
+ "eval_negation-triplets_loss": 1.1223009824752808,
359
+ "eval_negation-triplets_runtime": 3.102,
360
+ "eval_negation-triplets_samples_per_second": 41.264,
361
+ "eval_negation-triplets_steps_per_second": 0.322,
362
+ "step": 194
363
+ },
364
+ {
365
+ "epoch": 0.10046607975142413,
366
+ "eval_scitail-pairs-pos_loss": 0.06560208648443222,
367
+ "eval_scitail-pairs-pos_runtime": 2.6151,
368
+ "eval_scitail-pairs-pos_samples_per_second": 48.946,
369
+ "eval_scitail-pairs-pos_steps_per_second": 0.382,
370
+ "step": 194
371
+ },
372
+ {
373
+ "epoch": 0.10046607975142413,
374
+ "eval_scitail-pairs-qa_loss": 0.044671397656202316,
375
+ "eval_scitail-pairs-qa_runtime": 2.2115,
376
+ "eval_scitail-pairs-qa_samples_per_second": 57.879,
377
+ "eval_scitail-pairs-qa_steps_per_second": 0.452,
378
+ "step": 194
379
+ },
380
+ {
381
+ "epoch": 0.10046607975142413,
382
+ "eval_xsum-pairs_loss": 0.07691845297813416,
383
+ "eval_xsum-pairs_runtime": 3.043,
384
+ "eval_xsum-pairs_samples_per_second": 42.064,
385
+ "eval_xsum-pairs_steps_per_second": 0.329,
386
+ "step": 194
387
+ },
388
+ {
389
+ "epoch": 0.10046607975142413,
390
+ "eval_sciq_pairs_loss": 0.12039273232221603,
391
+ "eval_sciq_pairs_runtime": 3.878,
392
+ "eval_sciq_pairs_samples_per_second": 33.007,
393
+ "eval_sciq_pairs_steps_per_second": 0.258,
394
+ "step": 194
395
+ },
396
+ {
397
+ "epoch": 0.10046607975142413,
398
+ "eval_qasc_pairs_loss": 0.36198654770851135,
399
+ "eval_qasc_pairs_runtime": 2.0543,
400
+ "eval_qasc_pairs_samples_per_second": 62.307,
401
+ "eval_qasc_pairs_steps_per_second": 0.487,
402
+ "step": 194
403
+ },
404
+ {
405
+ "epoch": 0.10046607975142413,
406
+ "eval_openbookqa_pairs_loss": 0.5711529850959778,
407
+ "eval_openbookqa_pairs_runtime": 2.2213,
408
+ "eval_openbookqa_pairs_samples_per_second": 57.624,
409
+ "eval_openbookqa_pairs_steps_per_second": 0.45,
410
+ "step": 194
411
+ },
412
+ {
413
+ "epoch": 0.10046607975142413,
414
+ "eval_msmarco_pairs_loss": 0.3250836133956909,
415
+ "eval_msmarco_pairs_runtime": 2.22,
416
+ "eval_msmarco_pairs_samples_per_second": 57.657,
417
+ "eval_msmarco_pairs_steps_per_second": 0.45,
418
+ "step": 194
419
+ },
420
+ {
421
+ "epoch": 0.10046607975142413,
422
+ "eval_nq_pairs_loss": 0.4249531030654907,
423
+ "eval_nq_pairs_runtime": 2.7189,
424
+ "eval_nq_pairs_samples_per_second": 47.079,
425
+ "eval_nq_pairs_steps_per_second": 0.368,
426
+ "step": 194
427
+ },
428
+ {
429
+ "epoch": 0.10046607975142413,
430
+ "eval_trivia_pairs_loss": 0.2965388894081116,
431
+ "eval_trivia_pairs_runtime": 3.7556,
432
+ "eval_trivia_pairs_samples_per_second": 34.082,
433
+ "eval_trivia_pairs_steps_per_second": 0.266,
434
+ "step": 194
435
+ },
436
+ {
437
+ "epoch": 0.10046607975142413,
438
+ "eval_gooaq_pairs_loss": 0.2151084989309311,
439
+ "eval_gooaq_pairs_runtime": 2.1122,
440
+ "eval_gooaq_pairs_samples_per_second": 60.601,
441
+ "eval_gooaq_pairs_steps_per_second": 0.473,
442
+ "step": 194
443
+ },
444
+ {
445
+ "epoch": 0.10046607975142413,
446
+ "eval_paws-pos_loss": 0.0295370165258646,
447
+ "eval_paws-pos_runtime": 2.2123,
448
+ "eval_paws-pos_samples_per_second": 57.86,
449
+ "eval_paws-pos_steps_per_second": 0.452,
450
+ "step": 194
451
+ },
452
+ {
453
+ "epoch": 0.10046607975142413,
454
+ "eval_global_dataset_loss": 0.35498398542404175,
455
+ "eval_global_dataset_runtime": 10.1407,
456
+ "eval_global_dataset_samples_per_second": 41.023,
457
+ "eval_global_dataset_steps_per_second": 0.394,
458
+ "step": 194
459
+ },
460
+ {
461
+ "epoch": 0.10357327809425168,
462
+ "grad_norm": 43.11693572998047,
463
+ "learning_rate": 4.470588235294118e-06,
464
+ "loss": 0.376,
465
+ "step": 200
466
+ },
467
+ {
468
+ "epoch": 0.11393060590367685,
469
+ "grad_norm": 31.125375747680664,
470
+ "learning_rate": 4.941176470588235e-06,
471
+ "loss": 0.2782,
472
+ "step": 220
473
+ },
474
+ {
475
+ "epoch": 0.12428793371310203,
476
+ "grad_norm": 14.048110961914062,
477
+ "learning_rate": 5.411764705882353e-06,
478
+ "loss": 0.2391,
479
+ "step": 240
480
+ },
481
+ {
482
+ "epoch": 0.13464526152252718,
483
+ "grad_norm": 5.956579685211182,
484
+ "learning_rate": 5.882352941176471e-06,
485
+ "loss": 0.2767,
486
+ "step": 260
487
+ },
488
+ {
489
+ "epoch": 0.14500258933195237,
490
+ "grad_norm": 14.470146179199219,
491
+ "learning_rate": 6.352941176470589e-06,
492
+ "loss": 0.2359,
493
+ "step": 280
494
+ },
495
+ {
496
+ "epoch": 0.1506991196271362,
497
+ "eval_Qnli-dev_cosine_accuracy": 0.6875,
498
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.7567152976989746,
499
+ "eval_Qnli-dev_cosine_ap": 0.7133123361631746,
500
+ "eval_Qnli-dev_cosine_f1": 0.6853146853146853,
501
+ "eval_Qnli-dev_cosine_f1_threshold": 0.6536699533462524,
502
+ "eval_Qnli-dev_cosine_precision": 0.5833333333333334,
503
+ "eval_Qnli-dev_cosine_recall": 0.8305084745762712,
504
+ "eval_Qnli-dev_dot_accuracy": 0.673828125,
505
+ "eval_Qnli-dev_dot_accuracy_threshold": 731.5150756835938,
506
+ "eval_Qnli-dev_dot_ap": 0.6890325242500185,
507
+ "eval_Qnli-dev_dot_f1": 0.6782006920415226,
508
+ "eval_Qnli-dev_dot_f1_threshold": 621.156982421875,
509
+ "eval_Qnli-dev_dot_precision": 0.5730994152046783,
510
+ "eval_Qnli-dev_dot_recall": 0.8305084745762712,
511
+ "eval_Qnli-dev_euclidean_accuracy": 0.6875,
512
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 21.166996002197266,
513
+ "eval_Qnli-dev_euclidean_ap": 0.717782618584373,
514
+ "eval_Qnli-dev_euclidean_f1": 0.6832740213523131,
515
+ "eval_Qnli-dev_euclidean_f1_threshold": 25.534191131591797,
516
+ "eval_Qnli-dev_euclidean_precision": 0.588957055214724,
517
+ "eval_Qnli-dev_euclidean_recall": 0.8135593220338984,
518
+ "eval_Qnli-dev_manhattan_accuracy": 0.689453125,
519
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 717.0855712890625,
520
+ "eval_Qnli-dev_manhattan_ap": 0.7178394918687495,
521
+ "eval_Qnli-dev_manhattan_f1": 0.6815068493150686,
522
+ "eval_Qnli-dev_manhattan_f1_threshold": 809.9966430664062,
523
+ "eval_Qnli-dev_manhattan_precision": 0.5718390804597702,
524
+ "eval_Qnli-dev_manhattan_recall": 0.8432203389830508,
525
+ "eval_Qnli-dev_max_accuracy": 0.689453125,
526
+ "eval_Qnli-dev_max_accuracy_threshold": 731.5150756835938,
527
+ "eval_Qnli-dev_max_ap": 0.7178394918687495,
528
+ "eval_Qnli-dev_max_f1": 0.6853146853146853,
529
+ "eval_Qnli-dev_max_f1_threshold": 809.9966430664062,
530
+ "eval_Qnli-dev_max_precision": 0.588957055214724,
531
+ "eval_Qnli-dev_max_recall": 0.8432203389830508,
532
+ "eval_allNLI-dev_cosine_accuracy": 0.71484375,
533
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.8485724329948425,
534
+ "eval_allNLI-dev_cosine_ap": 0.5777522094864251,
535
+ "eval_allNLI-dev_cosine_f1": 0.5925925925925926,
536
+ "eval_allNLI-dev_cosine_f1_threshold": 0.7124052047729492,
537
+ "eval_allNLI-dev_cosine_precision": 0.4942084942084942,
538
+ "eval_allNLI-dev_cosine_recall": 0.7398843930635838,
539
+ "eval_allNLI-dev_dot_accuracy": 0.71484375,
540
+ "eval_allNLI-dev_dot_accuracy_threshold": 835.6192016601562,
541
+ "eval_allNLI-dev_dot_ap": 0.5708546535940942,
542
+ "eval_allNLI-dev_dot_f1": 0.5931372549019609,
543
+ "eval_allNLI-dev_dot_f1_threshold": 712.94482421875,
544
+ "eval_allNLI-dev_dot_precision": 0.5148936170212766,
545
+ "eval_allNLI-dev_dot_recall": 0.6994219653179191,
546
+ "eval_allNLI-dev_euclidean_accuracy": 0.712890625,
547
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 15.772256851196289,
548
+ "eval_allNLI-dev_euclidean_ap": 0.5773033114664347,
549
+ "eval_allNLI-dev_euclidean_f1": 0.5957446808510639,
550
+ "eval_allNLI-dev_euclidean_f1_threshold": 24.513042449951172,
551
+ "eval_allNLI-dev_euclidean_precision": 0.4713804713804714,
552
+ "eval_allNLI-dev_euclidean_recall": 0.8092485549132948,
553
+ "eval_allNLI-dev_manhattan_accuracy": 0.71484375,
554
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 494.4720153808594,
555
+ "eval_allNLI-dev_manhattan_ap": 0.5787277750430182,
556
+ "eval_allNLI-dev_manhattan_f1": 0.597457627118644,
557
+ "eval_allNLI-dev_manhattan_f1_threshold": 764.1075439453125,
558
+ "eval_allNLI-dev_manhattan_precision": 0.47157190635451507,
559
+ "eval_allNLI-dev_manhattan_recall": 0.815028901734104,
560
+ "eval_allNLI-dev_max_accuracy": 0.71484375,
561
+ "eval_allNLI-dev_max_accuracy_threshold": 835.6192016601562,
562
+ "eval_allNLI-dev_max_ap": 0.5787277750430182,
563
+ "eval_allNLI-dev_max_f1": 0.597457627118644,
564
+ "eval_allNLI-dev_max_f1_threshold": 764.1075439453125,
565
+ "eval_allNLI-dev_max_precision": 0.5148936170212766,
566
+ "eval_allNLI-dev_max_recall": 0.815028901734104,
567
+ "eval_sequential_score": 0.7178394918687495,
568
+ "eval_sts-test_pearson_cosine": 0.9080888281681364,
569
+ "eval_sts-test_pearson_dot": 0.8993720999648187,
570
+ "eval_sts-test_pearson_euclidean": 0.9185021221297063,
571
+ "eval_sts-test_pearson_manhattan": 0.9182084064307341,
572
+ "eval_sts-test_pearson_max": 0.9185021221297063,
573
+ "eval_sts-test_spearman_cosine": 0.9145502926755805,
574
+ "eval_sts-test_spearman_dot": 0.8990795555767088,
575
+ "eval_sts-test_spearman_euclidean": 0.9143005806370166,
576
+ "eval_sts-test_spearman_manhattan": 0.9141107457861942,
577
+ "eval_sts-test_spearman_max": 0.9145502926755805,
578
+ "eval_vitaminc-pairs_loss": 3.4645299911499023,
579
+ "eval_vitaminc-pairs_runtime": 4.4497,
580
+ "eval_vitaminc-pairs_samples_per_second": 28.766,
581
+ "eval_vitaminc-pairs_steps_per_second": 0.225,
582
+ "step": 291
583
+ },
584
+ {
585
+ "epoch": 0.1506991196271362,
586
+ "eval_negation-triplets_loss": 0.8774887323379517,
587
+ "eval_negation-triplets_runtime": 3.1401,
588
+ "eval_negation-triplets_samples_per_second": 40.764,
589
+ "eval_negation-triplets_steps_per_second": 0.318,
590
+ "step": 291
591
+ },
592
+ {
593
+ "epoch": 0.1506991196271362,
594
+ "eval_scitail-pairs-pos_loss": 0.029673559591174126,
595
+ "eval_scitail-pairs-pos_runtime": 2.6642,
596
+ "eval_scitail-pairs-pos_samples_per_second": 48.044,
597
+ "eval_scitail-pairs-pos_steps_per_second": 0.375,
598
+ "step": 291
599
+ },
600
+ {
601
+ "epoch": 0.1506991196271362,
602
+ "eval_scitail-pairs-qa_loss": 0.011800204403698444,
603
+ "eval_scitail-pairs-qa_runtime": 2.1861,
604
+ "eval_scitail-pairs-qa_samples_per_second": 58.551,
605
+ "eval_scitail-pairs-qa_steps_per_second": 0.457,
606
+ "step": 291
607
+ },
608
+ {
609
+ "epoch": 0.1506991196271362,
610
+ "eval_xsum-pairs_loss": 0.017930012196302414,
611
+ "eval_xsum-pairs_runtime": 3.0255,
612
+ "eval_xsum-pairs_samples_per_second": 42.307,
613
+ "eval_xsum-pairs_steps_per_second": 0.331,
614
+ "step": 291
615
+ },
616
+ {
617
+ "epoch": 0.1506991196271362,
618
+ "eval_sciq_pairs_loss": 0.09765021502971649,
619
+ "eval_sciq_pairs_runtime": 3.8726,
620
+ "eval_sciq_pairs_samples_per_second": 33.053,
621
+ "eval_sciq_pairs_steps_per_second": 0.258,
622
+ "step": 291
623
+ },
624
+ {
625
+ "epoch": 0.1506991196271362,
626
+ "eval_qasc_pairs_loss": 0.3064229488372803,
627
+ "eval_qasc_pairs_runtime": 2.1307,
628
+ "eval_qasc_pairs_samples_per_second": 60.075,
629
+ "eval_qasc_pairs_steps_per_second": 0.469,
630
+ "step": 291
631
+ },
632
+ {
633
+ "epoch": 0.1506991196271362,
634
+ "eval_openbookqa_pairs_loss": 0.46111759543418884,
635
+ "eval_openbookqa_pairs_runtime": 2.2685,
636
+ "eval_openbookqa_pairs_samples_per_second": 56.424,
637
+ "eval_openbookqa_pairs_steps_per_second": 0.441,
638
+ "step": 291
639
+ },
640
+ {
641
+ "epoch": 0.1506991196271362,
642
+ "eval_msmarco_pairs_loss": 0.08168309926986694,
643
+ "eval_msmarco_pairs_runtime": 2.2657,
644
+ "eval_msmarco_pairs_samples_per_second": 56.495,
645
+ "eval_msmarco_pairs_steps_per_second": 0.441,
646
+ "step": 291
647
+ },
648
+ {
649
+ "epoch": 0.1506991196271362,
650
+ "eval_nq_pairs_loss": 0.13220462203025818,
651
+ "eval_nq_pairs_runtime": 2.7139,
652
+ "eval_nq_pairs_samples_per_second": 47.164,
653
+ "eval_nq_pairs_steps_per_second": 0.368,
654
+ "step": 291
655
+ },
656
+ {
657
+ "epoch": 0.1506991196271362,
658
+ "eval_trivia_pairs_loss": 0.1532345414161682,
659
+ "eval_trivia_pairs_runtime": 3.76,
660
+ "eval_trivia_pairs_samples_per_second": 34.043,
661
+ "eval_trivia_pairs_steps_per_second": 0.266,
662
+ "step": 291
663
+ },
664
+ {
665
+ "epoch": 0.1506991196271362,
666
+ "eval_gooaq_pairs_loss": 0.10126010328531265,
667
+ "eval_gooaq_pairs_runtime": 2.1372,
668
+ "eval_gooaq_pairs_samples_per_second": 59.892,
669
+ "eval_gooaq_pairs_steps_per_second": 0.468,
670
+ "step": 291
671
+ },
672
+ {
673
+ "epoch": 0.1506991196271362,
674
+ "eval_paws-pos_loss": 0.021147189661860466,
675
+ "eval_paws-pos_runtime": 2.2138,
676
+ "eval_paws-pos_samples_per_second": 57.819,
677
+ "eval_paws-pos_steps_per_second": 0.452,
678
+ "step": 291
679
+ },
680
+ {
681
+ "epoch": 0.1506991196271362,
682
+ "eval_global_dataset_loss": 0.2509276270866394,
683
+ "eval_global_dataset_runtime": 10.154,
684
+ "eval_global_dataset_samples_per_second": 40.969,
685
+ "eval_global_dataset_steps_per_second": 0.394,
686
+ "step": 291
687
+ },
688
+ {
689
+ "epoch": 0.15535991714137753,
690
+ "grad_norm": 6.319842338562012,
691
+ "learning_rate": 6.823529411764706e-06,
692
+ "loss": 0.1505,
693
+ "step": 300
694
+ },
695
+ {
696
+ "epoch": 0.1657172449508027,
697
+ "grad_norm": 16.11246109008789,
698
+ "learning_rate": 7.294117647058823e-06,
699
+ "loss": 0.1473,
700
+ "step": 320
701
+ },
702
+ {
703
+ "epoch": 0.17607457276022787,
704
+ "grad_norm": 9.587472915649414,
705
+ "learning_rate": 7.764705882352943e-06,
706
+ "loss": 0.1614,
707
+ "step": 340
708
+ },
709
+ {
710
+ "epoch": 0.18643190056965303,
711
+ "grad_norm": 21.551036834716797,
712
+ "learning_rate": 8.23529411764706e-06,
713
+ "loss": 0.1834,
714
+ "step": 360
715
+ },
716
+ {
717
+ "epoch": 0.1967892283790782,
718
+ "grad_norm": 7.1668548583984375,
719
+ "learning_rate": 8.705882352941177e-06,
720
+ "loss": 0.164,
721
+ "step": 380
722
+ },
723
+ {
724
+ "epoch": 0.20093215950284826,
725
+ "eval_Qnli-dev_cosine_accuracy": 0.685546875,
726
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.6639062166213989,
727
+ "eval_Qnli-dev_cosine_ap": 0.7263308094103806,
728
+ "eval_Qnli-dev_cosine_f1": 0.6986754966887417,
729
+ "eval_Qnli-dev_cosine_f1_threshold": 0.576126754283905,
730
+ "eval_Qnli-dev_cosine_precision": 0.5733695652173914,
731
+ "eval_Qnli-dev_cosine_recall": 0.8940677966101694,
732
+ "eval_Qnli-dev_dot_accuracy": 0.677734375,
733
+ "eval_Qnli-dev_dot_accuracy_threshold": 591.922607421875,
734
+ "eval_Qnli-dev_dot_ap": 0.7011149935901715,
735
+ "eval_Qnli-dev_dot_f1": 0.6837606837606838,
736
+ "eval_Qnli-dev_dot_f1_threshold": 505.88946533203125,
737
+ "eval_Qnli-dev_dot_precision": 0.5730659025787965,
738
+ "eval_Qnli-dev_dot_recall": 0.847457627118644,
739
+ "eval_Qnli-dev_euclidean_accuracy": 0.693359375,
740
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 23.223190307617188,
741
+ "eval_Qnli-dev_euclidean_ap": 0.7305021849937567,
742
+ "eval_Qnli-dev_euclidean_f1": 0.698205546492659,
743
+ "eval_Qnli-dev_euclidean_f1_threshold": 27.346588134765625,
744
+ "eval_Qnli-dev_euclidean_precision": 0.5676392572944297,
745
+ "eval_Qnli-dev_euclidean_recall": 0.9067796610169492,
746
+ "eval_Qnli-dev_manhattan_accuracy": 0.6953125,
747
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 733.809814453125,
748
+ "eval_Qnli-dev_manhattan_ap": 0.7286452491858623,
749
+ "eval_Qnli-dev_manhattan_f1": 0.7008264462809918,
750
+ "eval_Qnli-dev_manhattan_f1_threshold": 845.1378784179688,
751
+ "eval_Qnli-dev_manhattan_precision": 0.5745257452574526,
752
+ "eval_Qnli-dev_manhattan_recall": 0.8983050847457628,
753
+ "eval_Qnli-dev_max_accuracy": 0.6953125,
754
+ "eval_Qnli-dev_max_accuracy_threshold": 733.809814453125,
755
+ "eval_Qnli-dev_max_ap": 0.7305021849937567,
756
+ "eval_Qnli-dev_max_f1": 0.7008264462809918,
757
+ "eval_Qnli-dev_max_f1_threshold": 845.1378784179688,
758
+ "eval_Qnli-dev_max_precision": 0.5745257452574526,
759
+ "eval_Qnli-dev_max_recall": 0.9067796610169492,
760
+ "eval_allNLI-dev_cosine_accuracy": 0.720703125,
761
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.8122999668121338,
762
+ "eval_allNLI-dev_cosine_ap": 0.6057945129739214,
763
+ "eval_allNLI-dev_cosine_f1": 0.6099585062240664,
764
+ "eval_allNLI-dev_cosine_f1_threshold": 0.6289657950401306,
765
+ "eval_allNLI-dev_cosine_precision": 0.47572815533980584,
766
+ "eval_allNLI-dev_cosine_recall": 0.8497109826589595,
767
+ "eval_allNLI-dev_dot_accuracy": 0.71875,
768
+ "eval_allNLI-dev_dot_accuracy_threshold": 745.8334350585938,
769
+ "eval_allNLI-dev_dot_ap": 0.5916353965674287,
770
+ "eval_allNLI-dev_dot_f1": 0.610655737704918,
771
+ "eval_allNLI-dev_dot_f1_threshold": 540.4627075195312,
772
+ "eval_allNLI-dev_dot_precision": 0.473015873015873,
773
+ "eval_allNLI-dev_dot_recall": 0.861271676300578,
774
+ "eval_allNLI-dev_euclidean_accuracy": 0.71875,
775
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 18.420812606811523,
776
+ "eval_allNLI-dev_euclidean_ap": 0.6036307863078971,
777
+ "eval_allNLI-dev_euclidean_f1": 0.6182572614107884,
778
+ "eval_allNLI-dev_euclidean_f1_threshold": 25.34260368347168,
779
+ "eval_allNLI-dev_euclidean_precision": 0.48220064724919093,
780
+ "eval_allNLI-dev_euclidean_recall": 0.861271676300578,
781
+ "eval_allNLI-dev_manhattan_accuracy": 0.72265625,
782
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 600.5528564453125,
783
+ "eval_allNLI-dev_manhattan_ap": 0.60455800678133,
784
+ "eval_allNLI-dev_manhattan_f1": 0.6170212765957447,
785
+ "eval_allNLI-dev_manhattan_f1_threshold": 781.2642822265625,
786
+ "eval_allNLI-dev_manhattan_precision": 0.4882154882154882,
787
+ "eval_allNLI-dev_manhattan_recall": 0.838150289017341,
788
+ "eval_allNLI-dev_max_accuracy": 0.72265625,
789
+ "eval_allNLI-dev_max_accuracy_threshold": 745.8334350585938,
790
+ "eval_allNLI-dev_max_ap": 0.6057945129739214,
791
+ "eval_allNLI-dev_max_f1": 0.6182572614107884,
792
+ "eval_allNLI-dev_max_f1_threshold": 781.2642822265625,
793
+ "eval_allNLI-dev_max_precision": 0.4882154882154882,
794
+ "eval_allNLI-dev_max_recall": 0.861271676300578,
795
+ "eval_sequential_score": 0.7305021849937567,
796
+ "eval_sts-test_pearson_cosine": 0.9127094359947289,
797
+ "eval_sts-test_pearson_dot": 0.895861018162462,
798
+ "eval_sts-test_pearson_euclidean": 0.9199643208978237,
799
+ "eval_sts-test_pearson_manhattan": 0.9196227536115376,
800
+ "eval_sts-test_pearson_max": 0.9199643208978237,
801
+ "eval_sts-test_spearman_cosine": 0.9147098309224408,
802
+ "eval_sts-test_spearman_dot": 0.8908736085574486,
803
+ "eval_sts-test_spearman_euclidean": 0.9157628281029806,
804
+ "eval_sts-test_spearman_manhattan": 0.9154038334840987,
805
+ "eval_sts-test_spearman_max": 0.9157628281029806,
806
+ "eval_vitaminc-pairs_loss": 2.9898574352264404,
807
+ "eval_vitaminc-pairs_runtime": 4.5557,
808
+ "eval_vitaminc-pairs_samples_per_second": 28.097,
809
+ "eval_vitaminc-pairs_steps_per_second": 0.22,
810
+ "step": 388
811
+ },
812
+ {
813
+ "epoch": 0.20093215950284826,
814
+ "eval_negation-triplets_loss": 0.761246919631958,
815
+ "eval_negation-triplets_runtime": 3.3164,
816
+ "eval_negation-triplets_samples_per_second": 38.596,
817
+ "eval_negation-triplets_steps_per_second": 0.302,
818
+ "step": 388
819
+ },
820
+ {
821
+ "epoch": 0.20093215950284826,
822
+ "eval_scitail-pairs-pos_loss": 0.0324205681681633,
823
+ "eval_scitail-pairs-pos_runtime": 2.7411,
824
+ "eval_scitail-pairs-pos_samples_per_second": 46.696,
825
+ "eval_scitail-pairs-pos_steps_per_second": 0.365,
826
+ "step": 388
827
+ },
828
+ {
829
+ "epoch": 0.20093215950284826,
830
+ "eval_scitail-pairs-qa_loss": 0.0026867901906371117,
831
+ "eval_scitail-pairs-qa_runtime": 2.4223,
832
+ "eval_scitail-pairs-qa_samples_per_second": 52.843,
833
+ "eval_scitail-pairs-qa_steps_per_second": 0.413,
834
+ "step": 388
835
+ },
836
+ {
837
+ "epoch": 0.20093215950284826,
838
+ "eval_xsum-pairs_loss": 0.00942266546189785,
839
+ "eval_xsum-pairs_runtime": 3.1951,
840
+ "eval_xsum-pairs_samples_per_second": 40.061,
841
+ "eval_xsum-pairs_steps_per_second": 0.313,
842
+ "step": 388
843
+ },
844
+ {
845
+ "epoch": 0.20093215950284826,
846
+ "eval_sciq_pairs_loss": 0.09475678950548172,
847
+ "eval_sciq_pairs_runtime": 4.1048,
848
+ "eval_sciq_pairs_samples_per_second": 31.183,
849
+ "eval_sciq_pairs_steps_per_second": 0.244,
850
+ "step": 388
851
+ },
852
+ {
853
+ "epoch": 0.20093215950284826,
854
+ "eval_qasc_pairs_loss": 0.2342282086610794,
855
+ "eval_qasc_pairs_runtime": 2.27,
856
+ "eval_qasc_pairs_samples_per_second": 56.388,
857
+ "eval_qasc_pairs_steps_per_second": 0.441,
858
+ "step": 388
859
+ },
860
+ {
861
+ "epoch": 0.20093215950284826,
862
+ "eval_openbookqa_pairs_loss": 0.4459604024887085,
863
+ "eval_openbookqa_pairs_runtime": 2.3513,
864
+ "eval_openbookqa_pairs_samples_per_second": 54.438,
865
+ "eval_openbookqa_pairs_steps_per_second": 0.425,
866
+ "step": 388
867
+ },
868
+ {
869
+ "epoch": 0.20093215950284826,
870
+ "eval_msmarco_pairs_loss": 0.2047792673110962,
871
+ "eval_msmarco_pairs_runtime": 2.3749,
872
+ "eval_msmarco_pairs_samples_per_second": 53.897,
873
+ "eval_msmarco_pairs_steps_per_second": 0.421,
874
+ "step": 388
875
+ },
876
+ {
877
+ "epoch": 0.20093215950284826,
878
+ "eval_nq_pairs_loss": 0.12687399983406067,
879
+ "eval_nq_pairs_runtime": 2.8216,
880
+ "eval_nq_pairs_samples_per_second": 45.365,
881
+ "eval_nq_pairs_steps_per_second": 0.354,
882
+ "step": 388
883
+ },
884
+ {
885
+ "epoch": 0.20093215950284826,
886
+ "eval_trivia_pairs_loss": 0.208355113863945,
887
+ "eval_trivia_pairs_runtime": 3.8421,
888
+ "eval_trivia_pairs_samples_per_second": 33.315,
889
+ "eval_trivia_pairs_steps_per_second": 0.26,
890
+ "step": 388
891
+ },
892
+ {
893
+ "epoch": 0.20093215950284826,
894
+ "eval_gooaq_pairs_loss": 0.10170701891183853,
895
+ "eval_gooaq_pairs_runtime": 2.3264,
896
+ "eval_gooaq_pairs_samples_per_second": 55.02,
897
+ "eval_gooaq_pairs_steps_per_second": 0.43,
898
+ "step": 388
899
+ },
900
+ {
901
+ "epoch": 0.20093215950284826,
902
+ "eval_paws-pos_loss": 0.0226531233638525,
903
+ "eval_paws-pos_runtime": 2.4028,
904
+ "eval_paws-pos_samples_per_second": 53.271,
905
+ "eval_paws-pos_steps_per_second": 0.416,
906
+ "step": 388
907
+ },
908
+ {
909
+ "epoch": 0.20093215950284826,
910
+ "eval_global_dataset_loss": 0.22992311418056488,
911
+ "eval_global_dataset_runtime": 10.4483,
912
+ "eval_global_dataset_samples_per_second": 39.815,
913
+ "eval_global_dataset_steps_per_second": 0.383,
914
+ "step": 388
915
+ },
916
+ {
917
+ "epoch": 0.20714655618850336,
918
+ "grad_norm": 0.033974967896938324,
919
+ "learning_rate": 9.176470588235295e-06,
920
+ "loss": 0.1426,
921
+ "step": 400
922
+ },
923
+ {
924
+ "epoch": 0.21750388399792853,
925
+ "grad_norm": 8.04489517211914,
926
+ "learning_rate": 9.647058823529412e-06,
927
+ "loss": 0.1838,
928
+ "step": 420
929
+ },
930
+ {
931
+ "epoch": 0.2278612118073537,
932
+ "grad_norm": 37.961544036865234,
933
+ "learning_rate": 1.0117647058823531e-05,
934
+ "loss": 0.1324,
935
+ "step": 440
936
+ },
937
+ {
938
+ "epoch": 0.23821853961677888,
939
+ "grad_norm": 9.86117172241211,
940
+ "learning_rate": 1.0588235294117648e-05,
941
+ "loss": 0.1242,
942
+ "step": 460
943
+ },
944
+ {
945
+ "epoch": 0.24857586742620405,
946
+ "grad_norm": 15.386984825134277,
947
+ "learning_rate": 1.1058823529411766e-05,
948
+ "loss": 0.2166,
949
+ "step": 480
950
+ },
951
+ {
952
+ "epoch": 0.25116519937856036,
953
+ "eval_Qnli-dev_cosine_accuracy": 0.70703125,
954
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.6728878021240234,
955
+ "eval_Qnli-dev_cosine_ap": 0.7490927864840249,
956
+ "eval_Qnli-dev_cosine_f1": 0.7092198581560283,
957
+ "eval_Qnli-dev_cosine_f1_threshold": 0.6182924509048462,
958
+ "eval_Qnli-dev_cosine_precision": 0.6097560975609756,
959
+ "eval_Qnli-dev_cosine_recall": 0.847457627118644,
960
+ "eval_Qnli-dev_dot_accuracy": 0.67578125,
961
+ "eval_Qnli-dev_dot_accuracy_threshold": 664.639404296875,
962
+ "eval_Qnli-dev_dot_ap": 0.7143774472576185,
963
+ "eval_Qnli-dev_dot_f1": 0.7084019769357496,
964
+ "eval_Qnli-dev_dot_f1_threshold": 506.4283447265625,
965
+ "eval_Qnli-dev_dot_precision": 0.5795148247978437,
966
+ "eval_Qnli-dev_dot_recall": 0.9110169491525424,
967
+ "eval_Qnli-dev_euclidean_accuracy": 0.7109375,
968
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 23.347135543823242,
969
+ "eval_Qnli-dev_euclidean_ap": 0.7578270539486094,
970
+ "eval_Qnli-dev_euclidean_f1": 0.712041884816754,
971
+ "eval_Qnli-dev_euclidean_f1_threshold": 26.101980209350586,
972
+ "eval_Qnli-dev_euclidean_precision": 0.6053412462908012,
973
+ "eval_Qnli-dev_euclidean_recall": 0.864406779661017,
974
+ "eval_Qnli-dev_manhattan_accuracy": 0.71484375,
975
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 734.9889526367188,
976
+ "eval_Qnli-dev_manhattan_ap": 0.7578518420666434,
977
+ "eval_Qnli-dev_manhattan_f1": 0.7160940325497287,
978
+ "eval_Qnli-dev_manhattan_f1_threshold": 797.8458251953125,
979
+ "eval_Qnli-dev_manhattan_precision": 0.6246056782334385,
980
+ "eval_Qnli-dev_manhattan_recall": 0.8389830508474576,
981
+ "eval_Qnli-dev_max_accuracy": 0.71484375,
982
+ "eval_Qnli-dev_max_accuracy_threshold": 734.9889526367188,
983
+ "eval_Qnli-dev_max_ap": 0.7578518420666434,
984
+ "eval_Qnli-dev_max_f1": 0.7160940325497287,
985
+ "eval_Qnli-dev_max_f1_threshold": 797.8458251953125,
986
+ "eval_Qnli-dev_max_precision": 0.6246056782334385,
987
+ "eval_Qnli-dev_max_recall": 0.9110169491525424,
988
+ "eval_allNLI-dev_cosine_accuracy": 0.712890625,
989
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.8226721286773682,
990
+ "eval_allNLI-dev_cosine_ap": 0.608903927832523,
991
+ "eval_allNLI-dev_cosine_f1": 0.6211764705882353,
992
+ "eval_allNLI-dev_cosine_f1_threshold": 0.6668639183044434,
993
+ "eval_allNLI-dev_cosine_precision": 0.5238095238095238,
994
+ "eval_allNLI-dev_cosine_recall": 0.7630057803468208,
995
+ "eval_allNLI-dev_dot_accuracy": 0.72265625,
996
+ "eval_allNLI-dev_dot_accuracy_threshold": 701.8555908203125,
997
+ "eval_allNLI-dev_dot_ap": 0.6006292150580212,
998
+ "eval_allNLI-dev_dot_f1": 0.6206896551724138,
999
+ "eval_allNLI-dev_dot_f1_threshold": 543.5947265625,
1000
+ "eval_allNLI-dev_dot_precision": 0.4948453608247423,
1001
+ "eval_allNLI-dev_dot_recall": 0.8323699421965318,
1002
+ "eval_allNLI-dev_euclidean_accuracy": 0.716796875,
1003
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 21.63890266418457,
1004
+ "eval_allNLI-dev_euclidean_ap": 0.6064044650997461,
1005
+ "eval_allNLI-dev_euclidean_f1": 0.6169354838709676,
1006
+ "eval_allNLI-dev_euclidean_f1_threshold": 25.579940795898438,
1007
+ "eval_allNLI-dev_euclidean_precision": 0.47368421052631576,
1008
+ "eval_allNLI-dev_euclidean_recall": 0.884393063583815,
1009
+ "eval_allNLI-dev_manhattan_accuracy": 0.71484375,
1010
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 673.708251953125,
1011
+ "eval_allNLI-dev_manhattan_ap": 0.6078798861215969,
1012
+ "eval_allNLI-dev_manhattan_f1": 0.6170212765957447,
1013
+ "eval_allNLI-dev_manhattan_f1_threshold": 779.1580200195312,
1014
+ "eval_allNLI-dev_manhattan_precision": 0.4882154882154882,
1015
+ "eval_allNLI-dev_manhattan_recall": 0.838150289017341,
1016
+ "eval_allNLI-dev_max_accuracy": 0.72265625,
1017
+ "eval_allNLI-dev_max_accuracy_threshold": 701.8555908203125,
1018
+ "eval_allNLI-dev_max_ap": 0.608903927832523,
1019
+ "eval_allNLI-dev_max_f1": 0.6211764705882353,
1020
+ "eval_allNLI-dev_max_f1_threshold": 779.1580200195312,
1021
+ "eval_allNLI-dev_max_precision": 0.5238095238095238,
1022
+ "eval_allNLI-dev_max_recall": 0.884393063583815,
1023
+ "eval_sequential_score": 0.7578518420666434,
1024
+ "eval_sts-test_pearson_cosine": 0.9085589003585355,
1025
+ "eval_sts-test_pearson_dot": 0.8934066815875845,
1026
+ "eval_sts-test_pearson_euclidean": 0.9132129682245754,
1027
+ "eval_sts-test_pearson_manhattan": 0.9127682865746231,
1028
+ "eval_sts-test_pearson_max": 0.9132129682245754,
1029
+ "eval_sts-test_spearman_cosine": 0.9109138499261769,
1030
+ "eval_sts-test_spearman_dot": 0.8920668114399275,
1031
+ "eval_sts-test_spearman_euclidean": 0.912336089764457,
1032
+ "eval_sts-test_spearman_manhattan": 0.9119349842086059,
1033
+ "eval_sts-test_spearman_max": 0.912336089764457,
1034
+ "eval_vitaminc-pairs_loss": 2.359689474105835,
1035
+ "eval_vitaminc-pairs_runtime": 4.4921,
1036
+ "eval_vitaminc-pairs_samples_per_second": 28.494,
1037
+ "eval_vitaminc-pairs_steps_per_second": 0.223,
1038
+ "step": 485
1039
+ },
1040
+ {
1041
+ "epoch": 0.25116519937856036,
1042
+ "eval_negation-triplets_loss": 0.6828347444534302,
1043
+ "eval_negation-triplets_runtime": 3.0767,
1044
+ "eval_negation-triplets_samples_per_second": 41.603,
1045
+ "eval_negation-triplets_steps_per_second": 0.325,
1046
+ "step": 485
1047
+ },
1048
+ {
1049
+ "epoch": 0.25116519937856036,
1050
+ "eval_scitail-pairs-pos_loss": 0.020232411101460457,
1051
+ "eval_scitail-pairs-pos_runtime": 2.6103,
1052
+ "eval_scitail-pairs-pos_samples_per_second": 49.036,
1053
+ "eval_scitail-pairs-pos_steps_per_second": 0.383,
1054
+ "step": 485
1055
+ },
1056
+ {
1057
+ "epoch": 0.25116519937856036,
1058
+ "eval_scitail-pairs-qa_loss": 0.0017561395652592182,
1059
+ "eval_scitail-pairs-qa_runtime": 2.2409,
1060
+ "eval_scitail-pairs-qa_samples_per_second": 57.12,
1061
+ "eval_scitail-pairs-qa_steps_per_second": 0.446,
1062
+ "step": 485
1063
+ },
1064
+ {
1065
+ "epoch": 0.25116519937856036,
1066
+ "eval_xsum-pairs_loss": 0.009539155289530754,
1067
+ "eval_xsum-pairs_runtime": 3.0343,
1068
+ "eval_xsum-pairs_samples_per_second": 42.184,
1069
+ "eval_xsum-pairs_steps_per_second": 0.33,
1070
+ "step": 485
1071
+ },
1072
+ {
1073
+ "epoch": 0.25116519937856036,
1074
+ "eval_sciq_pairs_loss": 0.07515428215265274,
1075
+ "eval_sciq_pairs_runtime": 3.8288,
1076
+ "eval_sciq_pairs_samples_per_second": 33.431,
1077
+ "eval_sciq_pairs_steps_per_second": 0.261,
1078
+ "step": 485
1079
+ },
1080
+ {
1081
+ "epoch": 0.25116519937856036,
1082
+ "eval_qasc_pairs_loss": 0.16715534031391144,
1083
+ "eval_qasc_pairs_runtime": 2.0736,
1084
+ "eval_qasc_pairs_samples_per_second": 61.729,
1085
+ "eval_qasc_pairs_steps_per_second": 0.482,
1086
+ "step": 485
1087
+ },
1088
+ {
1089
+ "epoch": 0.25116519937856036,
1090
+ "eval_openbookqa_pairs_loss": 0.5365710854530334,
1091
+ "eval_openbookqa_pairs_runtime": 2.2749,
1092
+ "eval_openbookqa_pairs_samples_per_second": 56.267,
1093
+ "eval_openbookqa_pairs_steps_per_second": 0.44,
1094
+ "step": 485
1095
+ },
1096
+ {
1097
+ "epoch": 0.25116519937856036,
1098
+ "eval_msmarco_pairs_loss": 0.183290034532547,
1099
+ "eval_msmarco_pairs_runtime": 2.2376,
1100
+ "eval_msmarco_pairs_samples_per_second": 57.204,
1101
+ "eval_msmarco_pairs_steps_per_second": 0.447,
1102
+ "step": 485
1103
+ },
1104
+ {
1105
+ "epoch": 0.25116519937856036,
1106
+ "eval_nq_pairs_loss": 0.13633984327316284,
1107
+ "eval_nq_pairs_runtime": 2.7168,
1108
+ "eval_nq_pairs_samples_per_second": 47.115,
1109
+ "eval_nq_pairs_steps_per_second": 0.368,
1110
+ "step": 485
1111
+ },
1112
+ {
1113
+ "epoch": 0.25116519937856036,
1114
+ "eval_trivia_pairs_loss": 0.13907591998577118,
1115
+ "eval_trivia_pairs_runtime": 3.7638,
1116
+ "eval_trivia_pairs_samples_per_second": 34.008,
1117
+ "eval_trivia_pairs_steps_per_second": 0.266,
1118
+ "step": 485
1119
+ },
1120
+ {
1121
+ "epoch": 0.25116519937856036,
1122
+ "eval_gooaq_pairs_loss": 0.15382522344589233,
1123
+ "eval_gooaq_pairs_runtime": 2.1349,
1124
+ "eval_gooaq_pairs_samples_per_second": 59.955,
1125
+ "eval_gooaq_pairs_steps_per_second": 0.468,
1126
+ "step": 485
1127
+ },
1128
+ {
1129
+ "epoch": 0.25116519937856036,
1130
+ "eval_paws-pos_loss": 0.02764580212533474,
1131
+ "eval_paws-pos_runtime": 2.299,
1132
+ "eval_paws-pos_samples_per_second": 55.676,
1133
+ "eval_paws-pos_steps_per_second": 0.435,
1134
+ "step": 485
1135
+ },
1136
+ {
1137
+ "epoch": 0.25116519937856036,
1138
+ "eval_global_dataset_loss": 0.26656147837638855,
1139
+ "eval_global_dataset_runtime": 10.0817,
1140
+ "eval_global_dataset_samples_per_second": 41.263,
1141
+ "eval_global_dataset_steps_per_second": 0.397,
1142
+ "step": 485
1143
+ },
1144
+ {
1145
+ "epoch": 0.2589331952356292,
1146
+ "grad_norm": 8.639649391174316,
1147
+ "learning_rate": 1.1529411764705883e-05,
1148
+ "loss": 0.1781,
1149
+ "step": 500
1150
+ },
1151
+ {
1152
+ "epoch": 0.26929052304505435,
1153
+ "grad_norm": 14.192313194274902,
1154
+ "learning_rate": 1.2e-05,
1155
+ "loss": 0.2177,
1156
+ "step": 520
1157
+ },
1158
+ {
1159
+ "epoch": 0.2796478508544795,
1160
+ "grad_norm": 0.47864726185798645,
1161
+ "learning_rate": 1.2470588235294119e-05,
1162
+ "loss": 0.5771,
1163
+ "step": 540
1164
+ },
1165
+ {
1166
+ "epoch": 0.29000517866390474,
1167
+ "grad_norm": 94.00303649902344,
1168
+ "learning_rate": 1.291764705882353e-05,
1169
+ "loss": 2.2303,
1170
+ "step": 560
1171
+ },
1172
+ {
1173
+ "epoch": 0.3003625064733299,
1174
+ "grad_norm": 284.8737487792969,
1175
+ "learning_rate": 1.3364705882352942e-05,
1176
+ "loss": 1.0045,
1177
+ "step": 580
1178
+ },
1179
+ {
1180
+ "epoch": 0.3013982392542724,
1181
+ "eval_Qnli-dev_cosine_accuracy": 0.6953125,
1182
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.8107659816741943,
1183
+ "eval_Qnli-dev_cosine_ap": 0.7306763373744987,
1184
+ "eval_Qnli-dev_cosine_f1": 0.6955074875207986,
1185
+ "eval_Qnli-dev_cosine_f1_threshold": 0.7081253528594971,
1186
+ "eval_Qnli-dev_cosine_precision": 0.5726027397260274,
1187
+ "eval_Qnli-dev_cosine_recall": 0.885593220338983,
1188
+ "eval_Qnli-dev_dot_accuracy": 0.671875,
1189
+ "eval_Qnli-dev_dot_accuracy_threshold": 875.9421997070312,
1190
+ "eval_Qnli-dev_dot_ap": 0.6876970673529026,
1191
+ "eval_Qnli-dev_dot_f1": 0.6821192052980133,
1192
+ "eval_Qnli-dev_dot_f1_threshold": 786.2505493164062,
1193
+ "eval_Qnli-dev_dot_precision": 0.5597826086956522,
1194
+ "eval_Qnli-dev_dot_recall": 0.8728813559322034,
1195
+ "eval_Qnli-dev_euclidean_accuracy": 0.69921875,
1196
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 21.132396697998047,
1197
+ "eval_Qnli-dev_euclidean_ap": 0.7325235937497143,
1198
+ "eval_Qnli-dev_euclidean_f1": 0.6955074875207986,
1199
+ "eval_Qnli-dev_euclidean_f1_threshold": 25.694360733032227,
1200
+ "eval_Qnli-dev_euclidean_precision": 0.5726027397260274,
1201
+ "eval_Qnli-dev_euclidean_recall": 0.885593220338983,
1202
+ "eval_Qnli-dev_manhattan_accuracy": 0.701171875,
1203
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 650.69775390625,
1204
+ "eval_Qnli-dev_manhattan_ap": 0.73351057649253,
1205
+ "eval_Qnli-dev_manhattan_f1": 0.6923076923076924,
1206
+ "eval_Qnli-dev_manhattan_f1_threshold": 790.9528198242188,
1207
+ "eval_Qnli-dev_manhattan_precision": 0.5718232044198895,
1208
+ "eval_Qnli-dev_manhattan_recall": 0.8771186440677966,
1209
+ "eval_Qnli-dev_max_accuracy": 0.701171875,
1210
+ "eval_Qnli-dev_max_accuracy_threshold": 875.9421997070312,
1211
+ "eval_Qnli-dev_max_ap": 0.73351057649253,
1212
+ "eval_Qnli-dev_max_f1": 0.6955074875207986,
1213
+ "eval_Qnli-dev_max_f1_threshold": 790.9528198242188,
1214
+ "eval_Qnli-dev_max_precision": 0.5726027397260274,
1215
+ "eval_Qnli-dev_max_recall": 0.885593220338983,
1216
+ "eval_allNLI-dev_cosine_accuracy": 0.71875,
1217
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.8345531225204468,
1218
+ "eval_allNLI-dev_cosine_ap": 0.5833256810054208,
1219
+ "eval_allNLI-dev_cosine_f1": 0.6093366093366094,
1220
+ "eval_allNLI-dev_cosine_f1_threshold": 0.7519584894180298,
1221
+ "eval_allNLI-dev_cosine_precision": 0.5299145299145299,
1222
+ "eval_allNLI-dev_cosine_recall": 0.7167630057803468,
1223
+ "eval_allNLI-dev_dot_accuracy": 0.71875,
1224
+ "eval_allNLI-dev_dot_accuracy_threshold": 932.83544921875,
1225
+ "eval_allNLI-dev_dot_ap": 0.5730668161963208,
1226
+ "eval_allNLI-dev_dot_f1": 0.6140350877192983,
1227
+ "eval_allNLI-dev_dot_f1_threshold": 790.3121337890625,
1228
+ "eval_allNLI-dev_dot_precision": 0.49469964664310956,
1229
+ "eval_allNLI-dev_dot_recall": 0.8092485549132948,
1230
+ "eval_allNLI-dev_euclidean_accuracy": 0.712890625,
1231
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 16.462337493896484,
1232
+ "eval_allNLI-dev_euclidean_ap": 0.5830290393354319,
1233
+ "eval_allNLI-dev_euclidean_f1": 0.6080760095011876,
1234
+ "eval_allNLI-dev_euclidean_f1_threshold": 23.817108154296875,
1235
+ "eval_allNLI-dev_euclidean_precision": 0.5161290322580645,
1236
+ "eval_allNLI-dev_euclidean_recall": 0.7398843930635838,
1237
+ "eval_allNLI-dev_manhattan_accuracy": 0.71484375,
1238
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 514.4776611328125,
1239
+ "eval_allNLI-dev_manhattan_ap": 0.5824345257218883,
1240
+ "eval_allNLI-dev_manhattan_f1": 0.6029411764705882,
1241
+ "eval_allNLI-dev_manhattan_f1_threshold": 725.9110717773438,
1242
+ "eval_allNLI-dev_manhattan_precision": 0.5234042553191489,
1243
+ "eval_allNLI-dev_manhattan_recall": 0.7109826589595376,
1244
+ "eval_allNLI-dev_max_accuracy": 0.71875,
1245
+ "eval_allNLI-dev_max_accuracy_threshold": 932.83544921875,
1246
+ "eval_allNLI-dev_max_ap": 0.5833256810054208,
1247
+ "eval_allNLI-dev_max_f1": 0.6140350877192983,
1248
+ "eval_allNLI-dev_max_f1_threshold": 790.3121337890625,
1249
+ "eval_allNLI-dev_max_precision": 0.5299145299145299,
1250
+ "eval_allNLI-dev_max_recall": 0.8092485549132948,
1251
+ "eval_sequential_score": 0.73351057649253,
1252
+ "eval_sts-test_pearson_cosine": 0.911958388742002,
1253
+ "eval_sts-test_pearson_dot": 0.8881053452310657,
1254
+ "eval_sts-test_pearson_euclidean": 0.9250703199093523,
1255
+ "eval_sts-test_pearson_manhattan": 0.9254282934479543,
1256
+ "eval_sts-test_pearson_max": 0.9254282934479543,
1257
+ "eval_sts-test_spearman_cosine": 0.9182240579769849,
1258
+ "eval_sts-test_spearman_dot": 0.8777027753148232,
1259
+ "eval_sts-test_spearman_euclidean": 0.9183138737585973,
1260
+ "eval_sts-test_spearman_manhattan": 0.9189913183535404,
1261
+ "eval_sts-test_spearman_max": 0.9189913183535404,
1262
+ "eval_vitaminc-pairs_loss": 3.1416079998016357,
1263
+ "eval_vitaminc-pairs_runtime": 4.5,
1264
+ "eval_vitaminc-pairs_samples_per_second": 28.445,
1265
+ "eval_vitaminc-pairs_steps_per_second": 0.222,
1266
+ "step": 582
1267
+ },
1268
+ {
1269
+ "epoch": 0.3013982392542724,
1270
+ "eval_negation-triplets_loss": 0.7199142575263977,
1271
+ "eval_negation-triplets_runtime": 3.1016,
1272
+ "eval_negation-triplets_samples_per_second": 41.27,
1273
+ "eval_negation-triplets_steps_per_second": 0.322,
1274
+ "step": 582
1275
+ },
1276
+ {
1277
+ "epoch": 0.3013982392542724,
1278
+ "eval_scitail-pairs-pos_loss": 0.020503610372543335,
1279
+ "eval_scitail-pairs-pos_runtime": 2.6474,
1280
+ "eval_scitail-pairs-pos_samples_per_second": 48.35,
1281
+ "eval_scitail-pairs-pos_steps_per_second": 0.378,
1282
+ "step": 582
1283
+ },
1284
+ {
1285
+ "epoch": 0.3013982392542724,
1286
+ "eval_scitail-pairs-qa_loss": 0.014284193515777588,
1287
+ "eval_scitail-pairs-qa_runtime": 2.2281,
1288
+ "eval_scitail-pairs-qa_samples_per_second": 57.447,
1289
+ "eval_scitail-pairs-qa_steps_per_second": 0.449,
1290
+ "step": 582
1291
+ },
1292
+ {
1293
+ "epoch": 0.3013982392542724,
1294
+ "eval_xsum-pairs_loss": 0.020332960411906242,
1295
+ "eval_xsum-pairs_runtime": 3.0646,
1296
+ "eval_xsum-pairs_samples_per_second": 41.767,
1297
+ "eval_xsum-pairs_steps_per_second": 0.326,
1298
+ "step": 582
1299
+ },
1300
+ {
1301
+ "epoch": 0.3013982392542724,
1302
+ "eval_sciq_pairs_loss": 0.07365372776985168,
1303
+ "eval_sciq_pairs_runtime": 3.9037,
1304
+ "eval_sciq_pairs_samples_per_second": 32.789,
1305
+ "eval_sciq_pairs_steps_per_second": 0.256,
1306
+ "step": 582
1307
+ },
1308
+ {
1309
+ "epoch": 0.3013982392542724,
1310
+ "eval_qasc_pairs_loss": 0.9374014735221863,
1311
+ "eval_qasc_pairs_runtime": 2.0898,
1312
+ "eval_qasc_pairs_samples_per_second": 61.249,
1313
+ "eval_qasc_pairs_steps_per_second": 0.479,
1314
+ "step": 582
1315
+ },
1316
+ {
1317
+ "epoch": 0.3013982392542724,
1318
+ "eval_openbookqa_pairs_loss": 0.5403007864952087,
1319
+ "eval_openbookqa_pairs_runtime": 2.1959,
1320
+ "eval_openbookqa_pairs_samples_per_second": 58.291,
1321
+ "eval_openbookqa_pairs_steps_per_second": 0.455,
1322
+ "step": 582
1323
+ },
1324
+ {
1325
+ "epoch": 0.3013982392542724,
1326
+ "eval_msmarco_pairs_loss": 2.8998327255249023,
1327
+ "eval_msmarco_pairs_runtime": 2.2455,
1328
+ "eval_msmarco_pairs_samples_per_second": 57.004,
1329
+ "eval_msmarco_pairs_steps_per_second": 0.445,
1330
+ "step": 582
1331
+ },
1332
+ {
1333
+ "epoch": 0.3013982392542724,
1334
+ "eval_nq_pairs_loss": 0.1996317207813263,
1335
+ "eval_nq_pairs_runtime": 2.7271,
1336
+ "eval_nq_pairs_samples_per_second": 46.936,
1337
+ "eval_nq_pairs_steps_per_second": 0.367,
1338
+ "step": 582
1339
+ },
1340
+ {
1341
+ "epoch": 0.3013982392542724,
1342
+ "eval_trivia_pairs_loss": 0.16345469653606415,
1343
+ "eval_trivia_pairs_runtime": 3.7445,
1344
+ "eval_trivia_pairs_samples_per_second": 34.183,
1345
+ "eval_trivia_pairs_steps_per_second": 0.267,
1346
+ "step": 582
1347
+ },
1348
+ {
1349
+ "epoch": 0.3013982392542724,
1350
+ "eval_gooaq_pairs_loss": 1.6607106924057007,
1351
+ "eval_gooaq_pairs_runtime": 2.1231,
1352
+ "eval_gooaq_pairs_samples_per_second": 60.289,
1353
+ "eval_gooaq_pairs_steps_per_second": 0.471,
1354
+ "step": 582
1355
+ },
1356
+ {
1357
+ "epoch": 0.3013982392542724,
1358
+ "eval_paws-pos_loss": 0.026082171127200127,
1359
+ "eval_paws-pos_runtime": 2.2328,
1360
+ "eval_paws-pos_samples_per_second": 57.327,
1361
+ "eval_paws-pos_steps_per_second": 0.448,
1362
+ "step": 582
1363
+ },
1364
+ {
1365
+ "epoch": 0.3013982392542724,
1366
+ "eval_global_dataset_loss": 0.4393865168094635,
1367
+ "eval_global_dataset_runtime": 10.0711,
1368
+ "eval_global_dataset_samples_per_second": 41.307,
1369
+ "eval_global_dataset_steps_per_second": 0.397,
1370
+ "step": 582
1371
+ },
1372
+ {
1373
+ "epoch": 0.31071983428275507,
1374
+ "grad_norm": 2.5726535320281982,
1375
+ "learning_rate": 1.3835294117647059e-05,
1376
+ "loss": 0.5632,
1377
+ "step": 600
1378
+ },
1379
+ {
1380
+ "epoch": 0.32107716209218023,
1381
+ "grad_norm": 65.14546966552734,
1382
+ "learning_rate": 1.4305882352941177e-05,
1383
+ "loss": 0.2533,
1384
+ "step": 620
1385
+ },
1386
+ {
1387
+ "epoch": 0.3314344899016054,
1388
+ "grad_norm": 4.207058906555176,
1389
+ "learning_rate": 1.4776470588235294e-05,
1390
+ "loss": 0.2559,
1391
+ "step": 640
1392
+ },
1393
+ {
1394
+ "epoch": 0.34179181771103057,
1395
+ "grad_norm": 278.2864990234375,
1396
+ "learning_rate": 1.5223529411764707e-05,
1397
+ "loss": 0.2664,
1398
+ "step": 660
1399
+ },
1400
+ {
1401
+ "epoch": 0.3516312791299845,
1402
+ "eval_Qnli-dev_cosine_accuracy": 0.69140625,
1403
+ "eval_Qnli-dev_cosine_accuracy_threshold": 0.677927553653717,
1404
+ "eval_Qnli-dev_cosine_ap": 0.7135273513673487,
1405
+ "eval_Qnli-dev_cosine_f1": 0.7079037800687284,
1406
+ "eval_Qnli-dev_cosine_f1_threshold": 0.6066854000091553,
1407
+ "eval_Qnli-dev_cosine_precision": 0.5953757225433526,
1408
+ "eval_Qnli-dev_cosine_recall": 0.8728813559322034,
1409
+ "eval_Qnli-dev_dot_accuracy": 0.669921875,
1410
+ "eval_Qnli-dev_dot_accuracy_threshold": 615.3697509765625,
1411
+ "eval_Qnli-dev_dot_ap": 0.6697900654196967,
1412
+ "eval_Qnli-dev_dot_f1": 0.6946308724832215,
1413
+ "eval_Qnli-dev_dot_f1_threshold": 583.5701293945312,
1414
+ "eval_Qnli-dev_dot_precision": 0.575,
1415
+ "eval_Qnli-dev_dot_recall": 0.8771186440677966,
1416
+ "eval_Qnli-dev_euclidean_accuracy": 0.70703125,
1417
+ "eval_Qnli-dev_euclidean_accuracy_threshold": 24.364826202392578,
1418
+ "eval_Qnli-dev_euclidean_ap": 0.720573540901372,
1419
+ "eval_Qnli-dev_euclidean_f1": 0.7003367003367004,
1420
+ "eval_Qnli-dev_euclidean_f1_threshold": 28.203102111816406,
1421
+ "eval_Qnli-dev_euclidean_precision": 0.5810055865921788,
1422
+ "eval_Qnli-dev_euclidean_recall": 0.8813559322033898,
1423
+ "eval_Qnli-dev_manhattan_accuracy": 0.705078125,
1424
+ "eval_Qnli-dev_manhattan_accuracy_threshold": 754.4404296875,
1425
+ "eval_Qnli-dev_manhattan_ap": 0.7236352677013607,
1426
+ "eval_Qnli-dev_manhattan_f1": 0.701168614357262,
1427
+ "eval_Qnli-dev_manhattan_f1_threshold": 882.8988037109375,
1428
+ "eval_Qnli-dev_manhattan_precision": 0.5785123966942148,
1429
+ "eval_Qnli-dev_manhattan_recall": 0.8898305084745762,
1430
+ "eval_Qnli-dev_max_accuracy": 0.70703125,
1431
+ "eval_Qnli-dev_max_accuracy_threshold": 754.4404296875,
1432
+ "eval_Qnli-dev_max_ap": 0.7236352677013607,
1433
+ "eval_Qnli-dev_max_f1": 0.7079037800687284,
1434
+ "eval_Qnli-dev_max_f1_threshold": 882.8988037109375,
1435
+ "eval_Qnli-dev_max_precision": 0.5953757225433526,
1436
+ "eval_Qnli-dev_max_recall": 0.8898305084745762,
1437
+ "eval_allNLI-dev_cosine_accuracy": 0.7109375,
1438
+ "eval_allNLI-dev_cosine_accuracy_threshold": 0.8685251474380493,
1439
+ "eval_allNLI-dev_cosine_ap": 0.5715847909509861,
1440
+ "eval_allNLI-dev_cosine_f1": 0.5822222222222222,
1441
+ "eval_allNLI-dev_cosine_f1_threshold": 0.6784489154815674,
1442
+ "eval_allNLI-dev_cosine_precision": 0.4729241877256318,
1443
+ "eval_allNLI-dev_cosine_recall": 0.7572254335260116,
1444
+ "eval_allNLI-dev_dot_accuracy": 0.705078125,
1445
+ "eval_allNLI-dev_dot_accuracy_threshold": 886.351318359375,
1446
+ "eval_allNLI-dev_dot_ap": 0.5644148142455192,
1447
+ "eval_allNLI-dev_dot_f1": 0.5940170940170941,
1448
+ "eval_allNLI-dev_dot_f1_threshold": 659.7857666015625,
1449
+ "eval_allNLI-dev_dot_precision": 0.4711864406779661,
1450
+ "eval_allNLI-dev_dot_recall": 0.8034682080924855,
1451
+ "eval_allNLI-dev_euclidean_accuracy": 0.708984375,
1452
+ "eval_allNLI-dev_euclidean_accuracy_threshold": 15.756305694580078,
1453
+ "eval_allNLI-dev_euclidean_ap": 0.5670650775218166,
1454
+ "eval_allNLI-dev_euclidean_f1": 0.5840000000000001,
1455
+ "eval_allNLI-dev_euclidean_f1_threshold": 26.798587799072266,
1456
+ "eval_allNLI-dev_euclidean_precision": 0.44648318042813456,
1457
+ "eval_allNLI-dev_euclidean_recall": 0.8439306358381503,
1458
+ "eval_allNLI-dev_manhattan_accuracy": 0.712890625,
1459
+ "eval_allNLI-dev_manhattan_accuracy_threshold": 551.571533203125,
1460
+ "eval_allNLI-dev_manhattan_ap": 0.5708799354607844,
1461
+ "eval_allNLI-dev_manhattan_f1": 0.5863453815261045,
1462
+ "eval_allNLI-dev_manhattan_f1_threshold": 831.1856689453125,
1463
+ "eval_allNLI-dev_manhattan_precision": 0.4492307692307692,
1464
+ "eval_allNLI-dev_manhattan_recall": 0.8439306358381503,
1465
+ "eval_allNLI-dev_max_accuracy": 0.712890625,
1466
+ "eval_allNLI-dev_max_accuracy_threshold": 886.351318359375,
1467
+ "eval_allNLI-dev_max_ap": 0.5715847909509861,
1468
+ "eval_allNLI-dev_max_f1": 0.5940170940170941,
1469
+ "eval_allNLI-dev_max_f1_threshold": 831.1856689453125,
1470
+ "eval_allNLI-dev_max_precision": 0.4729241877256318,
1471
+ "eval_allNLI-dev_max_recall": 0.8439306358381503,
1472
+ "eval_sequential_score": 0.7236352677013607,
1473
+ "eval_sts-test_pearson_cosine": 0.9021470099112117,
1474
+ "eval_sts-test_pearson_dot": 0.8917251782363848,
1475
+ "eval_sts-test_pearson_euclidean": 0.9135854372994908,
1476
+ "eval_sts-test_pearson_manhattan": 0.9142235212520329,
1477
+ "eval_sts-test_pearson_max": 0.9142235212520329,
1478
+ "eval_sts-test_spearman_cosine": 0.907700664607013,
1479
+ "eval_sts-test_spearman_dot": 0.8887621545424798,
1480
+ "eval_sts-test_spearman_euclidean": 0.9072292995707775,
1481
+ "eval_sts-test_spearman_manhattan": 0.9078543851898925,
1482
+ "eval_sts-test_spearman_max": 0.9078543851898925,
1483
+ "eval_vitaminc-pairs_loss": 2.5545308589935303,
1484
+ "eval_vitaminc-pairs_runtime": 4.4484,
1485
+ "eval_vitaminc-pairs_samples_per_second": 28.775,
1486
+ "eval_vitaminc-pairs_steps_per_second": 0.225,
1487
+ "step": 679
1488
+ },
1489
+ {
1490
+ "epoch": 0.3516312791299845,
1491
+ "eval_negation-triplets_loss": 0.792955756187439,
1492
+ "eval_negation-triplets_runtime": 3.0828,
1493
+ "eval_negation-triplets_samples_per_second": 41.521,
1494
+ "eval_negation-triplets_steps_per_second": 0.324,
1495
+ "step": 679
1496
+ },
1497
+ {
1498
+ "epoch": 0.3516312791299845,
1499
+ "eval_scitail-pairs-pos_loss": 0.011221353895962238,
1500
+ "eval_scitail-pairs-pos_runtime": 2.6324,
1501
+ "eval_scitail-pairs-pos_samples_per_second": 48.624,
1502
+ "eval_scitail-pairs-pos_steps_per_second": 0.38,
1503
+ "step": 679
1504
+ },
1505
+ {
1506
+ "epoch": 0.3516312791299845,
1507
+ "eval_scitail-pairs-qa_loss": 0.008626868017017841,
1508
+ "eval_scitail-pairs-qa_runtime": 2.1929,
1509
+ "eval_scitail-pairs-qa_samples_per_second": 58.369,
1510
+ "eval_scitail-pairs-qa_steps_per_second": 0.456,
1511
+ "step": 679
1512
+ },
1513
+ {
1514
+ "epoch": 0.3516312791299845,
1515
+ "eval_xsum-pairs_loss": 0.004584914073348045,
1516
+ "eval_xsum-pairs_runtime": 3.0494,
1517
+ "eval_xsum-pairs_samples_per_second": 41.975,
1518
+ "eval_xsum-pairs_steps_per_second": 0.328,
1519
+ "step": 679
1520
+ },
1521
+ {
1522
+ "epoch": 0.3516312791299845,
1523
+ "eval_sciq_pairs_loss": 0.07755079865455627,
1524
+ "eval_sciq_pairs_runtime": 3.8446,
1525
+ "eval_sciq_pairs_samples_per_second": 33.294,
1526
+ "eval_sciq_pairs_steps_per_second": 0.26,
1527
+ "step": 679
1528
+ },
1529
+ {
1530
+ "epoch": 0.3516312791299845,
1531
+ "eval_qasc_pairs_loss": 0.2504812777042389,
1532
+ "eval_qasc_pairs_runtime": 2.0696,
1533
+ "eval_qasc_pairs_samples_per_second": 61.849,
1534
+ "eval_qasc_pairs_steps_per_second": 0.483,
1535
+ "step": 679
1536
+ },
1537
+ {
1538
+ "epoch": 0.3516312791299845,
1539
+ "eval_openbookqa_pairs_loss": 0.615034818649292,
1540
+ "eval_openbookqa_pairs_runtime": 2.1876,
1541
+ "eval_openbookqa_pairs_samples_per_second": 58.512,
1542
+ "eval_openbookqa_pairs_steps_per_second": 0.457,
1543
+ "step": 679
1544
+ },
1545
+ {
1546
+ "epoch": 0.3516312791299845,
1547
+ "eval_msmarco_pairs_loss": 0.12970499694347382,
1548
+ "eval_msmarco_pairs_runtime": 2.2547,
1549
+ "eval_msmarco_pairs_samples_per_second": 56.771,
1550
+ "eval_msmarco_pairs_steps_per_second": 0.444,
1551
+ "step": 679
1552
+ },
1553
+ {
1554
+ "epoch": 0.3516312791299845,
1555
+ "eval_nq_pairs_loss": 0.11113697290420532,
1556
+ "eval_nq_pairs_runtime": 2.7434,
1557
+ "eval_nq_pairs_samples_per_second": 46.658,
1558
+ "eval_nq_pairs_steps_per_second": 0.365,
1559
+ "step": 679
1560
+ },
1561
+ {
1562
+ "epoch": 0.3516312791299845,
1563
+ "eval_trivia_pairs_loss": 0.13746751844882965,
1564
+ "eval_trivia_pairs_runtime": 3.7502,
1565
+ "eval_trivia_pairs_samples_per_second": 34.131,
1566
+ "eval_trivia_pairs_steps_per_second": 0.267,
1567
+ "step": 679
1568
+ },
1569
+ {
1570
+ "epoch": 0.3516312791299845,
1571
+ "eval_gooaq_pairs_loss": 0.15882055461406708,
1572
+ "eval_gooaq_pairs_runtime": 2.1544,
1573
+ "eval_gooaq_pairs_samples_per_second": 59.413,
1574
+ "eval_gooaq_pairs_steps_per_second": 0.464,
1575
+ "step": 679
1576
+ },
1577
+ {
1578
+ "epoch": 0.3516312791299845,
1579
+ "eval_paws-pos_loss": 0.024272503331303596,
1580
+ "eval_paws-pos_runtime": 2.2216,
1581
+ "eval_paws-pos_samples_per_second": 57.616,
1582
+ "eval_paws-pos_steps_per_second": 0.45,
1583
+ "step": 679
1584
+ },
1585
+ {
1586
+ "epoch": 0.3516312791299845,
1587
+ "eval_global_dataset_loss": 0.19072183966636658,
1588
+ "eval_global_dataset_runtime": 10.0803,
1589
+ "eval_global_dataset_samples_per_second": 41.269,
1590
+ "eval_global_dataset_steps_per_second": 0.397,
1591
+ "step": 679
1592
+ },
1593
+ {
1594
+ "epoch": 0.35214914552045573,
1595
+ "grad_norm": 13.81802749633789,
1596
+ "learning_rate": 1.5694117647058825e-05,
1597
+ "loss": 0.2108,
1598
+ "step": 680
1599
+ },
1600
+ {
1601
+ "epoch": 0.3625064733298809,
1602
+ "grad_norm": 1107.8421630859375,
1603
+ "learning_rate": 1.6164705882352942e-05,
1604
+ "loss": 0.2936,
1605
+ "step": 700
1606
+ },
1607
+ {
1608
+ "epoch": 0.37286380113930606,
1609
+ "grad_norm": 9.161267280578613,
1610
+ "learning_rate": 1.6635294117647062e-05,
1611
+ "loss": 1.13,
1612
+ "step": 720
1613
+ },
1614
+ {
1615
+ "epoch": 0.3832211289487312,
1616
+ "grad_norm": 523.1478881835938,
1617
+ "learning_rate": 1.7105882352941176e-05,
1618
+ "loss": 0.2598,
1619
+ "step": 740
1620
+ },
1621
+ {
1622
+ "epoch": 0.3935784567581564,
1623
+ "grad_norm": 0.9771941304206848,
1624
+ "learning_rate": 1.7576470588235296e-05,
1625
+ "loss": 0.1599,
1626
+ "step": 760
1627
+ }
1628
+ ],
1629
+ "logging_steps": 20,
1630
+ "max_steps": 3862,
1631
+ "num_input_tokens_seen": 0,
1632
+ "num_train_epochs": 2,
1633
+ "save_steps": 387,
1634
+ "stateful_callbacks": {
1635
+ "TrainerControl": {
1636
+ "args": {
1637
+ "should_epoch_stop": false,
1638
+ "should_evaluate": false,
1639
+ "should_log": false,
1640
+ "should_save": true,
1641
+ "should_training_stop": false
1642
+ },
1643
+ "attributes": {}
1644
+ }
1645
+ },
1646
+ "total_flos": 0.0,
1647
+ "train_batch_size": 64,
1648
+ "trial_name": null,
1649
+ "trial_params": null
1650
+ }
checkpoint-774/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6afbd3ad35d0a5a9ae1e51fcec7df790b982c51eacf41bfffb4163061732d175
3
+ size 5560