SjardiWillems commited on
Commit
97a95df
·
verified ·
1 Parent(s): 086ef30

Training in progress, epoch 1

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. model.safetensors +1 -1
  2. run-0/checkpoint-108/config.json +25 -0
  3. run-0/checkpoint-108/model.safetensors +3 -0
  4. run-0/checkpoint-108/optimizer.pt +3 -0
  5. run-0/checkpoint-108/rng_state.pth +3 -0
  6. run-0/checkpoint-108/scheduler.pt +3 -0
  7. run-0/checkpoint-108/special_tokens_map.json +7 -0
  8. run-0/checkpoint-108/tokenizer.json +0 -0
  9. run-0/checkpoint-108/tokenizer_config.json +55 -0
  10. run-0/checkpoint-108/trainer_state.json +62 -0
  11. run-0/checkpoint-108/training_args.bin +3 -0
  12. run-0/checkpoint-108/vocab.txt +0 -0
  13. run-0/checkpoint-27/config.json +25 -0
  14. run-0/checkpoint-27/model.safetensors +3 -0
  15. run-0/checkpoint-27/optimizer.pt +3 -0
  16. run-0/checkpoint-27/rng_state.pth +3 -0
  17. run-0/checkpoint-27/scheduler.pt +3 -0
  18. run-0/checkpoint-27/special_tokens_map.json +7 -0
  19. run-0/checkpoint-27/tokenizer.json +0 -0
  20. run-0/checkpoint-27/tokenizer_config.json +55 -0
  21. run-0/checkpoint-27/trainer_state.json +35 -0
  22. run-0/checkpoint-27/training_args.bin +3 -0
  23. run-0/checkpoint-27/vocab.txt +0 -0
  24. run-0/checkpoint-54/model.safetensors +1 -1
  25. run-0/checkpoint-54/optimizer.pt +1 -1
  26. run-0/checkpoint-54/rng_state.pth +1 -1
  27. run-0/checkpoint-54/scheduler.pt +1 -1
  28. run-0/checkpoint-54/trainer_state.json +23 -14
  29. run-0/checkpoint-54/training_args.bin +1 -1
  30. run-0/checkpoint-81/config.json +25 -0
  31. run-0/checkpoint-81/model.safetensors +3 -0
  32. run-0/checkpoint-81/optimizer.pt +3 -0
  33. run-0/checkpoint-81/rng_state.pth +3 -0
  34. run-0/checkpoint-81/scheduler.pt +3 -0
  35. run-0/checkpoint-81/special_tokens_map.json +7 -0
  36. run-0/checkpoint-81/tokenizer.json +0 -0
  37. run-0/checkpoint-81/tokenizer_config.json +55 -0
  38. run-0/checkpoint-81/trainer_state.json +53 -0
  39. run-0/checkpoint-81/training_args.bin +3 -0
  40. run-0/checkpoint-81/vocab.txt +0 -0
  41. run-1/checkpoint-14/config.json +25 -0
  42. run-1/checkpoint-14/model.safetensors +3 -0
  43. run-1/checkpoint-14/optimizer.pt +3 -0
  44. run-1/checkpoint-14/rng_state.pth +3 -0
  45. run-1/checkpoint-14/scheduler.pt +3 -0
  46. run-1/checkpoint-14/special_tokens_map.json +7 -0
  47. run-1/checkpoint-14/tokenizer.json +0 -0
  48. run-1/checkpoint-14/tokenizer_config.json +55 -0
  49. run-1/checkpoint-14/trainer_state.json +35 -0
  50. run-1/checkpoint-14/training_args.bin +3 -0
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:118b9e30d6ba753e129f144784e81c66c9f54dfe0e443e050a97f3aee09e6a89
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74155891d985b0d42ab73fff22f25b8d3ec62478beed1d5b251aca605465c701
3
  size 267832560
run-0/checkpoint-108/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "vocab_size": 30522
25
+ }
run-0/checkpoint-108/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc23ec6c9b99f7db9871e18c0c6c6b5a7f76de0099a138273dde8a71f02c418
3
+ size 267832560
run-0/checkpoint-108/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2813b65a0911926f37d8128a020a6d3edd9bf71c2e3e2664a98ca4d2271e0f5a
3
+ size 535727290
run-0/checkpoint-108/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02d344819f2e055d3295c3153ae912bf7e7a3a1b7486fccd5b5f6d042316f900
3
+ size 14244
run-0/checkpoint-108/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67956c369d07429b010040d83c80330b5c70f2c6ea13f345939bbacb8a220880
3
+ size 1064
run-0/checkpoint-108/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-108/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-108/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-0/checkpoint-108/trainer_state.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3508011277661142,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-81",
4
+ "epoch": 4.0,
5
+ "eval_steps": 500,
6
+ "global_step": 108,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.5963238477706909,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 0.4957,
16
+ "eval_samples_per_second": 2103.954,
17
+ "eval_steps_per_second": 133.136,
18
+ "step": 27
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 0.5681455731391907,
23
+ "eval_matthews_correlation": 0.2145337380780115,
24
+ "eval_runtime": 0.5534,
25
+ "eval_samples_per_second": 1884.845,
26
+ "eval_steps_per_second": 119.271,
27
+ "step": 54
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_loss": 0.5657972693443298,
32
+ "eval_matthews_correlation": 0.3508011277661142,
33
+ "eval_runtime": 0.5816,
34
+ "eval_samples_per_second": 1793.217,
35
+ "eval_steps_per_second": 113.473,
36
+ "step": 81
37
+ },
38
+ {
39
+ "epoch": 4.0,
40
+ "eval_loss": 0.5970165133476257,
41
+ "eval_matthews_correlation": 0.3395845015956295,
42
+ "eval_runtime": 0.7089,
43
+ "eval_samples_per_second": 1471.251,
44
+ "eval_steps_per_second": 93.099,
45
+ "step": 108
46
+ }
47
+ ],
48
+ "logging_steps": 500,
49
+ "max_steps": 108,
50
+ "num_input_tokens_seen": 0,
51
+ "num_train_epochs": 4,
52
+ "save_steps": 500,
53
+ "total_flos": 0,
54
+ "train_batch_size": 32,
55
+ "trial_name": null,
56
+ "trial_params": {
57
+ "learning_rate": 3.092084833236513e-05,
58
+ "num_train_epochs": 4,
59
+ "per_device_train_batch_size": 32,
60
+ "seed": 5
61
+ }
62
+ }
run-0/checkpoint-108/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0528b625bd56c5f820c6a81109a84f0cb3b06c0ef021c43787878a2c5d2ba02
3
+ size 4984
run-0/checkpoint-108/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-27/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "vocab_size": 30522
25
+ }
run-0/checkpoint-27/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef7da22ad4eab57d7f75f227a7e86180d679df13dd0f44ce401b24a90efb8d2b
3
+ size 267832560
run-0/checkpoint-27/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d289e6ab0d6a4cfb2126a250cb48079727e361c488c719ebbdfe227c47bdc1f
3
+ size 535727290
run-0/checkpoint-27/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc9229173c7c9478c79be5e4d3be5b496d0478c042c4a00eef5442cda10e039
3
+ size 14244
run-0/checkpoint-27/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faed53a12ca49031f285ccdbb56fa11610bb4e971c2b33e5ef9375c9a2a7d448
3
+ size 1064
run-0/checkpoint-27/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-27/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-27/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-0/checkpoint-27/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.0,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-27",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 27,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.5963238477706909,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 0.4957,
16
+ "eval_samples_per_second": 2103.954,
17
+ "eval_steps_per_second": 133.136,
18
+ "step": 27
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 108,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 4,
25
+ "save_steps": 500,
26
+ "total_flos": 0,
27
+ "train_batch_size": 32,
28
+ "trial_name": null,
29
+ "trial_params": {
30
+ "learning_rate": 3.092084833236513e-05,
31
+ "num_train_epochs": 4,
32
+ "per_device_train_batch_size": 32,
33
+ "seed": 5
34
+ }
35
+ }
run-0/checkpoint-27/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0528b625bd56c5f820c6a81109a84f0cb3b06c0ef021c43787878a2c5d2ba02
3
+ size 4984
run-0/checkpoint-27/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-54/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bdb29a274a9f4796cfc327e736fcf38327227a01cb03b7189ba50dbb219592b9
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4b6a63ce9d46e1ae3cde219ca81b1533b07205e9af068914457b198860a26d5
3
  size 267832560
run-0/checkpoint-54/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e09f1a6c11df4f5e819d4994208eaea6a30d494e70cd0d49380f5f1a4c7f31
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0f74901e395bdea4b91798c80ae61a3c84da4be2eeebab193e6995ae7a567ba
3
  size 535727290
run-0/checkpoint-54/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44da968c6952a4010dfe7ab99b3b2a5e8ed126ffb3289a260aed361d65a6001c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ac6a3f739be45632e4af9f35c1d9c117846755b0a505b5a570d07cb5b90b58f
3
  size 14244
run-0/checkpoint-54/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebfa140991c390428dd17747db69853d779663c4aca0a3c1e089933ba28ed9eb
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcc93b69403b1613e45b57f6a0bcd68f331b2f63242419eb3b3da8f1b645a82c
3
  size 1064
run-0/checkpoint-54/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.0463559874942472,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-54",
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
  "global_step": 54,
7
  "is_hyper_param_search": true,
@@ -10,26 +10,35 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
- "eval_loss": 0.5884751677513123,
14
- "eval_matthews_correlation": 0.0463559874942472,
15
- "eval_runtime": 0.4958,
16
- "eval_samples_per_second": 2103.484,
17
- "eval_steps_per_second": 133.106,
 
 
 
 
 
 
 
 
 
18
  "step": 54
19
  }
20
  ],
21
  "logging_steps": 500,
22
- "max_steps": 54,
23
  "num_input_tokens_seen": 0,
24
- "num_train_epochs": 1,
25
  "save_steps": 500,
26
  "total_flos": 0,
27
- "train_batch_size": 16,
28
  "trial_name": null,
29
  "trial_params": {
30
- "learning_rate": 6.512461119853454e-05,
31
- "num_train_epochs": 1,
32
- "per_device_train_batch_size": 16,
33
- "seed": 3
34
  }
35
  }
 
1
  {
2
+ "best_metric": 0.2145337380780115,
3
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-54",
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
  "global_step": 54,
7
  "is_hyper_param_search": true,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.0,
13
+ "eval_loss": 0.5963238477706909,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 0.4957,
16
+ "eval_samples_per_second": 2103.954,
17
+ "eval_steps_per_second": 133.136,
18
+ "step": 27
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 0.5681455731391907,
23
+ "eval_matthews_correlation": 0.2145337380780115,
24
+ "eval_runtime": 0.5534,
25
+ "eval_samples_per_second": 1884.845,
26
+ "eval_steps_per_second": 119.271,
27
  "step": 54
28
  }
29
  ],
30
  "logging_steps": 500,
31
+ "max_steps": 108,
32
  "num_input_tokens_seen": 0,
33
+ "num_train_epochs": 4,
34
  "save_steps": 500,
35
  "total_flos": 0,
36
+ "train_batch_size": 32,
37
  "trial_name": null,
38
  "trial_params": {
39
+ "learning_rate": 3.092084833236513e-05,
40
+ "num_train_epochs": 4,
41
+ "per_device_train_batch_size": 32,
42
+ "seed": 5
43
  }
44
  }
run-0/checkpoint-54/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9cfb5803ec23c1c2dd9c46be94ed5fddaf79512fab886cf00c69777f781c10dd
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0528b625bd56c5f820c6a81109a84f0cb3b06c0ef021c43787878a2c5d2ba02
3
  size 4984
run-0/checkpoint-81/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "vocab_size": 30522
25
+ }
run-0/checkpoint-81/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa281389f8903747b388fa28b77c7ab78085becf5516402c2cf418720384c017
3
+ size 267832560
run-0/checkpoint-81/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:798daf08ae165353999bf64897f5602447c4a3bc1339ad7deef64facf65e5dfc
3
+ size 535727290
run-0/checkpoint-81/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00ea43076e2a6a6f6fccc5e1bba1409298cbb028a45bcde1e1db4c9d96576bc5
3
+ size 14244
run-0/checkpoint-81/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d846dcb394c6918f7be65f8e45ab030343a71ddbde11836828150e64a603805a
3
+ size 1064
run-0/checkpoint-81/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-0/checkpoint-81/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-0/checkpoint-81/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-0/checkpoint-81/trainer_state.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3508011277661142,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-81",
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 81,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.5963238477706909,
14
+ "eval_matthews_correlation": 0.0,
15
+ "eval_runtime": 0.4957,
16
+ "eval_samples_per_second": 2103.954,
17
+ "eval_steps_per_second": 133.136,
18
+ "step": 27
19
+ },
20
+ {
21
+ "epoch": 2.0,
22
+ "eval_loss": 0.5681455731391907,
23
+ "eval_matthews_correlation": 0.2145337380780115,
24
+ "eval_runtime": 0.5534,
25
+ "eval_samples_per_second": 1884.845,
26
+ "eval_steps_per_second": 119.271,
27
+ "step": 54
28
+ },
29
+ {
30
+ "epoch": 3.0,
31
+ "eval_loss": 0.5657972693443298,
32
+ "eval_matthews_correlation": 0.3508011277661142,
33
+ "eval_runtime": 0.5816,
34
+ "eval_samples_per_second": 1793.217,
35
+ "eval_steps_per_second": 113.473,
36
+ "step": 81
37
+ }
38
+ ],
39
+ "logging_steps": 500,
40
+ "max_steps": 108,
41
+ "num_input_tokens_seen": 0,
42
+ "num_train_epochs": 4,
43
+ "save_steps": 500,
44
+ "total_flos": 0,
45
+ "train_batch_size": 32,
46
+ "trial_name": null,
47
+ "trial_params": {
48
+ "learning_rate": 3.092084833236513e-05,
49
+ "num_train_epochs": 4,
50
+ "per_device_train_batch_size": 32,
51
+ "seed": 5
52
+ }
53
+ }
run-0/checkpoint-81/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0528b625bd56c5f820c6a81109a84f0cb3b06c0ef021c43787878a2c5d2ba02
3
+ size 4984
run-0/checkpoint-81/vocab.txt ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-14/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "distilbert-base-uncased",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.38.2",
24
+ "vocab_size": 30522
25
+ }
run-1/checkpoint-14/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74155891d985b0d42ab73fff22f25b8d3ec62478beed1d5b251aca605465c701
3
+ size 267832560
run-1/checkpoint-14/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b86309ebdef9cc2efb3682147508f45e81a889cc00200e9c99edf3cf8cc624a4
3
+ size 535727290
run-1/checkpoint-14/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389cabed0d047b1be35be02b5952ab8e1860c987423c5e5cff25a6eb6f41b94c
3
+ size 14244
run-1/checkpoint-14/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f07ea2836ff7c1f70c312e54c4927fac05939742a8500a18c3733797c5a086c
3
+ size 1064
run-1/checkpoint-14/special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
run-1/checkpoint-14/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
run-1/checkpoint-14/tokenizer_config.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "100": {
12
+ "content": "[UNK]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "101": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "102": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "103": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "do_lower_case": true,
47
+ "mask_token": "[MASK]",
48
+ "model_max_length": 512,
49
+ "pad_token": "[PAD]",
50
+ "sep_token": "[SEP]",
51
+ "strip_accents": null,
52
+ "tokenize_chinese_chars": true,
53
+ "tokenizer_class": "DistilBertTokenizer",
54
+ "unk_token": "[UNK]"
55
+ }
run-1/checkpoint-14/trainer_state.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": -0.004094348593533731,
3
+ "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-1/checkpoint-14",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 14,
7
+ "is_hyper_param_search": true,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "eval_loss": 0.6712570190429688,
14
+ "eval_matthews_correlation": -0.004094348593533731,
15
+ "eval_runtime": 0.4692,
16
+ "eval_samples_per_second": 2222.921,
17
+ "eval_steps_per_second": 140.664,
18
+ "step": 14
19
+ }
20
+ ],
21
+ "logging_steps": 500,
22
+ "max_steps": 28,
23
+ "num_input_tokens_seen": 0,
24
+ "num_train_epochs": 2,
25
+ "save_steps": 500,
26
+ "total_flos": 0,
27
+ "train_batch_size": 64,
28
+ "trial_name": null,
29
+ "trial_params": {
30
+ "learning_rate": 3.482861293435522e-06,
31
+ "num_train_epochs": 2,
32
+ "per_device_train_batch_size": 64,
33
+ "seed": 22
34
+ }
35
+ }
run-1/checkpoint-14/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df0f639343d75cb266d6d449e470a6f2e2f119cf3158ecf6b8d9125888beaf82
3
+ size 4984