saqidr commited on
Commit
b425aac
·
verified ·
1 Parent(s): f554fcc

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4ed14a983a063b04b5c0e74f719ee953be8e1b478635360e23a0e81062435df
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6b5a20caef3a60dbe25e51744780de8ee80e367ca8c79d235b3f1e28196b83
3
  size 268290900
run-4/checkpoint-1500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:417f67bfc777c996b35c8996ff62c52ec34d71e7a766e99077134c8e94a8f85b
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7e261c4133c5011993c01561d31449f1945278820742b1a10be67f815d8be65
3
  size 268290900
run-4/checkpoint-1500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa48a0c978c6a7ede2f05dfd7e1a4fdfa87e493dfd4351367eab5fe63f91142a
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a526f6972f40380fe67908c1a158679ea08f29595d4a7e0b940f91789649cca4
3
  size 536643898
run-4/checkpoint-1500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:55c8d3ce0734337fc0c187ca5543b4c70ca45d996531f199209b3a0c2a798109
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad4d7d251acf36e559c362893a1fb310c9f46b20e8a330025a14b6829ce4ab07
3
  size 1064
run-4/checkpoint-1500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:09c1d2b3517a81c25cfd3dc4720ca0f5c5cd50449810325441209751db6b15be
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829aeb9a80fa5e890bae01359822ae021bf174b7265d13bb14d18d678935efbc
3
  size 5176
run-4/checkpoint-2000/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36e3328b8fb4a8714c24f51f7d4171f2ba6461c4b6d5913393b71021bff698b8
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c880fb670519c3b8fd3b38da45a1633e5f4534106637fb8a43cac51424b796d
3
  size 268290900
run-4/checkpoint-2000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19456eaaf80e44986281ea730601eeddcfe0e0c962af2e6358cfd3f16de6611c
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d154f7a2ab3f1fb62596a00e50a64b5436b8321a0325b989c9e2f26ab5c57f2
3
  size 536643898
run-4/checkpoint-2000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5613af56824b89d2251abc079d69f51d9497a128212db6eb459f4c957e78a29b
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37ccc14cc902672c3d8e770c2a2ce3a532b454981f3118cb18a09274102a99fc
3
  size 14244
run-4/checkpoint-2000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43d45918bfeb622ba899798234ec7646c7a90dd3f5771db086f5b7dee1d5a530
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f38866eaf1d2baeb52a55cb38ece6ee67f3213265b0830dd06c82a9148795bea
3
  size 1064
run-4/checkpoint-2000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5d5d2fff8a65a6470ff21893289269130aa490c201d7c1e86a6b03304d6d7dd8
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:829aeb9a80fa5e890bae01359822ae021bf174b7265d13bb14d18d678935efbc
3
+ size 5176
run-6/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:150019f90e35dcb4b7f49da2bd990bec1dc64463d39dc2e4c01e0cfc43efd14c
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a6b5a20caef3a60dbe25e51744780de8ee80e367ca8c79d235b3f1e28196b83
3
  size 268290900
run-6/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:036dc8d61a536755c36e04920350c04dda9f8748aacdee7400f4603af435cbd9
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33e2558a9017819f695c86a2d2a95801009d84d8ff9874e7b19f22dd924876eb
3
  size 536643898
run-6/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb78b1e790f49c38375a9a03add3c9373ef72cf0138709ffb96775535ef084c6
3
  size 1064
run-6/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.5832258064516129,
14
- "eval_loss": 0.21771539747714996,
15
- "eval_runtime": 1.3209,
16
- "eval_samples_per_second": 2346.936,
17
- "eval_steps_per_second": 49.21,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.5158940553665161,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.3408,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 1590,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 5,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -42,12 +42,12 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 130072209152340.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.21209041164928688,
50
- "num_train_epochs": 5,
51
- "temperature": 8
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.59,
14
+ "eval_loss": 0.21894077956676483,
15
+ "eval_runtime": 1.246,
16
+ "eval_samples_per_second": 2488.003,
17
+ "eval_steps_per_second": 52.168,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5261774063110352,
23
+ "learning_rate": 1.4758909853249476e-05,
24
+ "loss": 0.346,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 1908,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 6,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 213076546446120.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.2868213510898018,
50
+ "num_train_epochs": 6,
51
+ "temperature": 7
52
  }
53
  }
run-6/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03960be7937998204e7d886e253e2b4e234ef43d75169693d208fdea3e6c7062
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6ee41bf2e97af70bb5c7bef1f94ebd5979e03ab5954577cc7dff4f754b42f8
3
  size 5176
run-8/checkpoint-1000/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.37.2",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.41.1",
330
  "vocab_size": 30522
331
  }
run-8/checkpoint-1000/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
run-8/checkpoint-1000/trainer_state.json CHANGED
@@ -10,41 +10,43 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6048387096774194,
14
- "eval_loss": 0.193294957280159,
15
- "eval_runtime": 1.3978,
16
- "eval_samples_per_second": 2217.705,
17
- "eval_steps_per_second": 46.5,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.57,
 
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3109,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8212903225806452,
29
- "eval_loss": 0.09445521980524063,
30
- "eval_runtime": 1.3905,
31
- "eval_samples_per_second": 2229.493,
32
- "eval_steps_per_second": 46.747,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8806451612903226,
38
- "eval_loss": 0.061732806265354156,
39
- "eval_runtime": 1.4039,
40
- "eval_samples_per_second": 2208.176,
41
- "eval_steps_per_second": 46.3,
42
  "step": 954
43
  },
44
  {
45
- "epoch": 3.14,
 
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1076,
48
  "step": 1000
49
  }
50
  ],
@@ -53,12 +55,24 @@
53
  "num_input_tokens_seen": 0,
54
  "num_train_epochs": 10,
55
  "save_steps": 500,
56
- "total_flos": 507156838101732.0,
 
 
 
 
 
 
 
 
 
 
 
 
57
  "train_batch_size": 48,
58
  "trial_name": null,
59
  "trial_params": {
60
- "alpha": 0.5027632612026453,
61
  "num_train_epochs": 10,
62
- "temperature": 16
63
  }
64
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6064516129032258,
14
+ "eval_loss": 0.2185761034488678,
15
+ "eval_runtime": 1.2573,
16
+ "eval_samples_per_second": 2465.677,
17
+ "eval_steps_per_second": 51.7,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5516161918640137,
23
  "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3519,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8412903225806452,
30
+ "eval_loss": 0.10163559764623642,
31
+ "eval_runtime": 1.2573,
32
+ "eval_samples_per_second": 2465.684,
33
+ "eval_steps_per_second": 51.7,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8858064516129033,
39
+ "eval_loss": 0.06496834009885788,
40
+ "eval_runtime": 1.2624,
41
+ "eval_samples_per_second": 2455.593,
42
+ "eval_steps_per_second": 51.488,
43
  "step": 954
44
  },
45
  {
46
+ "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5089602470397949,
48
  "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1182,
50
  "step": 1000
51
  }
52
  ],
 
55
  "num_input_tokens_seen": 0,
56
  "num_train_epochs": 10,
57
  "save_steps": 500,
58
+ "stateful_callbacks": {
59
+ "TrainerControl": {
60
+ "args": {
61
+ "should_epoch_stop": false,
62
+ "should_evaluate": false,
63
+ "should_log": false,
64
+ "should_save": true,
65
+ "should_training_stop": false
66
+ },
67
+ "attributes": {}
68
+ }
69
+ },
70
+ "total_flos": 379805351171904.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.8038743315868176,
75
  "num_train_epochs": 10,
76
+ "temperature": 6
77
  }
78
  }
run-8/checkpoint-1500/config.json CHANGED
@@ -326,6 +326,6 @@
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
- "transformers_version": "4.37.2",
330
  "vocab_size": 30522
331
  }
 
326
  "sinusoidal_pos_embds": false,
327
  "tie_weights_": true,
328
  "torch_dtype": "float32",
329
+ "transformers_version": "4.41.1",
330
  "vocab_size": 30522
331
  }
run-8/checkpoint-1500/tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 512,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
run-8/checkpoint-1500/trainer_state.json CHANGED
@@ -10,56 +10,59 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6048387096774194,
14
- "eval_loss": 0.193294957280159,
15
- "eval_runtime": 1.3978,
16
- "eval_samples_per_second": 2217.705,
17
- "eval_steps_per_second": 46.5,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.57,
 
22
  "learning_rate": 1.685534591194969e-05,
23
- "loss": 0.3109,
24
  "step": 500
25
  },
26
  {
27
  "epoch": 2.0,
28
- "eval_accuracy": 0.8212903225806452,
29
- "eval_loss": 0.09445521980524063,
30
- "eval_runtime": 1.3905,
31
- "eval_samples_per_second": 2229.493,
32
- "eval_steps_per_second": 46.747,
33
  "step": 636
34
  },
35
  {
36
  "epoch": 3.0,
37
- "eval_accuracy": 0.8806451612903226,
38
- "eval_loss": 0.061732806265354156,
39
- "eval_runtime": 1.4039,
40
- "eval_samples_per_second": 2208.176,
41
- "eval_steps_per_second": 46.3,
42
  "step": 954
43
  },
44
  {
45
- "epoch": 3.14,
 
46
  "learning_rate": 1.371069182389937e-05,
47
- "loss": 0.1076,
48
  "step": 1000
49
  },
50
  {
51
  "epoch": 4.0,
52
- "eval_accuracy": 0.9009677419354839,
53
- "eval_loss": 0.04701272398233414,
54
- "eval_runtime": 1.4228,
55
- "eval_samples_per_second": 2178.878,
56
- "eval_steps_per_second": 45.686,
57
  "step": 1272
58
  },
59
  {
60
- "epoch": 4.72,
 
61
  "learning_rate": 1.0566037735849058e-05,
62
- "loss": 0.0685,
63
  "step": 1500
64
  }
65
  ],
@@ -68,12 +71,24 @@
68
  "num_input_tokens_seen": 0,
69
  "num_train_epochs": 10,
70
  "save_steps": 500,
71
- "total_flos": 637476008310120.0,
 
 
 
 
 
 
 
 
 
 
 
 
72
  "train_batch_size": 48,
73
  "trial_name": null,
74
  "trial_params": {
75
- "alpha": 0.5027632612026453,
76
  "num_train_epochs": 10,
77
- "temperature": 16
78
  }
79
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6064516129032258,
14
+ "eval_loss": 0.2185761034488678,
15
+ "eval_runtime": 1.2573,
16
+ "eval_samples_per_second": 2465.677,
17
+ "eval_steps_per_second": 51.7,
18
  "step": 318
19
  },
20
  {
21
+ "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5516161918640137,
23
  "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3519,
25
  "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
+ "eval_accuracy": 0.8412903225806452,
30
+ "eval_loss": 0.10163559764623642,
31
+ "eval_runtime": 1.2573,
32
+ "eval_samples_per_second": 2465.684,
33
+ "eval_steps_per_second": 51.7,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
+ "eval_accuracy": 0.8858064516129033,
39
+ "eval_loss": 0.06496834009885788,
40
+ "eval_runtime": 1.2624,
41
+ "eval_samples_per_second": 2455.593,
42
+ "eval_steps_per_second": 51.488,
43
  "step": 954
44
  },
45
  {
46
+ "epoch": 3.1446540880503147,
47
+ "grad_norm": 0.5089602470397949,
48
  "learning_rate": 1.371069182389937e-05,
49
+ "loss": 0.1182,
50
  "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
+ "eval_accuracy": 0.9003225806451612,
55
+ "eval_loss": 0.048449017107486725,
56
+ "eval_runtime": 1.2726,
57
+ "eval_samples_per_second": 2435.892,
58
+ "eval_steps_per_second": 51.075,
59
  "step": 1272
60
  },
61
  {
62
+ "epoch": 4.716981132075472,
63
+ "grad_norm": 0.3282025456428528,
64
  "learning_rate": 1.0566037735849058e-05,
65
+ "loss": 0.0719,
66
  "step": 1500
67
  }
68
  ],
 
71
  "num_input_tokens_seen": 0,
72
  "num_train_epochs": 10,
73
  "save_steps": 500,
74
+ "stateful_callbacks": {
75
+ "TrainerControl": {
76
+ "args": {
77
+ "should_epoch_stop": false,
78
+ "should_evaluate": false,
79
+ "should_log": false,
80
+ "should_save": true,
81
+ "should_training_stop": false
82
+ },
83
+ "attributes": {}
84
+ }
85
+ },
86
+ "total_flos": 510232955961624.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.8038743315868176,
91
  "num_train_epochs": 10,
92
+ "temperature": 6
93
  }
94
  }
run-8/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b13871d74383d86435fd8abfc1e4e91aec6f513610429f304fa77dc682847ca0
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52acb0f10dbc4d7d9fc1bdc8234f938bf09a424cb2ae024de0d4b496ce371435
3
  size 268290900
run-8/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:079cfbdc10568e390a3e5da7c104fa6b167cafb7d9204a935437f3f49c4625be
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ede2108aa21755bea146deda4c9c419892c1cd9d89a6a7f459b3be1057346deb
3
  size 536643898
run-8/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e1264523e958cf7990dc5f42d876cc12129475c4603804cf66868aaf25c2c24
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
3
  size 1064
run-8/checkpoint-500/trainer_state.json CHANGED
@@ -10,25 +10,25 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6590322580645162,
14
- "eval_loss": 0.4307226538658142,
15
- "eval_runtime": 1.3268,
16
- "eval_samples_per_second": 2336.53,
17
- "eval_steps_per_second": 48.992,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
- "grad_norm": 0.942814290523529,
23
- "learning_rate": 1.371069182389937e-05,
24
- "loss": 0.6582,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
- "max_steps": 1590,
30
  "num_input_tokens_seen": 0,
31
- "num_train_epochs": 5,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
@@ -46,8 +46,8 @@
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.5348761545096068,
50
- "num_train_epochs": 5,
51
- "temperature": 2
52
  }
53
  }
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_accuracy": 0.6064516129032258,
14
+ "eval_loss": 0.2185761034488678,
15
+ "eval_runtime": 1.2573,
16
+ "eval_samples_per_second": 2465.677,
17
+ "eval_steps_per_second": 51.7,
18
  "step": 318
19
  },
20
  {
21
  "epoch": 1.5723270440251573,
22
+ "grad_norm": 0.5516161918640137,
23
+ "learning_rate": 1.685534591194969e-05,
24
+ "loss": 0.3519,
25
  "step": 500
26
  }
27
  ],
28
  "logging_steps": 500,
29
+ "max_steps": 3180,
30
  "num_input_tokens_seen": 0,
31
+ "num_train_epochs": 10,
32
  "save_steps": 500,
33
  "stateful_callbacks": {
34
  "TrainerControl": {
 
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.8038743315868176,
50
+ "num_train_epochs": 10,
51
+ "temperature": 6
52
  }
53
  }
run-8/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54f4065fa2b8e5cb22e349ab79a6f78b551068fff808fab27dd1319edd35db41
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f749d3ae69bc7e3b20b7cd1d8f394bea39a109520f3149e53f47330c0cf61cce
3
  size 5176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:829aeb9a80fa5e890bae01359822ae021bf174b7265d13bb14d18d678935efbc
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba6ee41bf2e97af70bb5c7bef1f94ebd5979e03ab5954577cc7dff4f754b42f8
3
  size 5176