yemen2016 commited on
Commit
6b2039a
·
verified ·
1 Parent(s): 17d5c01

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoint-1000/config.json +31 -0
  2. checkpoint-1000/generation_config.json +7 -0
  3. checkpoint-1000/optimizer.pt +3 -0
  4. checkpoint-1000/pytorch_model.bin +3 -0
  5. checkpoint-1000/rng_state.pth +3 -0
  6. checkpoint-1000/scheduler.pt +3 -0
  7. checkpoint-1000/trainer_state.json +28 -0
  8. checkpoint-1000/training_args.bin +3 -0
  9. checkpoint-1500/config.json +31 -0
  10. checkpoint-1500/generation_config.json +7 -0
  11. checkpoint-1500/optimizer.pt +3 -0
  12. checkpoint-1500/pytorch_model.bin +3 -0
  13. checkpoint-1500/rng_state.pth +3 -0
  14. checkpoint-1500/scheduler.pt +3 -0
  15. checkpoint-1500/trainer_state.json +34 -0
  16. checkpoint-1500/training_args.bin +3 -0
  17. checkpoint-2000/config.json +31 -0
  18. checkpoint-2000/generation_config.json +7 -0
  19. checkpoint-2000/optimizer.pt +3 -0
  20. checkpoint-2000/pytorch_model.bin +3 -0
  21. checkpoint-2000/rng_state.pth +3 -0
  22. checkpoint-2000/scheduler.pt +3 -0
  23. checkpoint-2000/trainer_state.json +48 -0
  24. checkpoint-2000/training_args.bin +3 -0
  25. checkpoint-2500/config.json +31 -0
  26. checkpoint-2500/generation_config.json +7 -0
  27. checkpoint-2500/optimizer.pt +3 -0
  28. checkpoint-2500/pytorch_model.bin +3 -0
  29. checkpoint-2500/rng_state.pth +3 -0
  30. checkpoint-2500/scheduler.pt +3 -0
  31. checkpoint-2500/trainer_state.json +54 -0
  32. checkpoint-2500/training_args.bin +3 -0
  33. checkpoint-3000/config.json +31 -0
  34. checkpoint-3000/generation_config.json +7 -0
  35. checkpoint-3000/optimizer.pt +3 -0
  36. checkpoint-3000/pytorch_model.bin +3 -0
  37. checkpoint-3000/rng_state.pth +3 -0
  38. checkpoint-3000/scheduler.pt +3 -0
  39. checkpoint-3000/trainer_state.json +60 -0
  40. checkpoint-3000/training_args.bin +3 -0
  41. checkpoint-3500/config.json +31 -0
  42. checkpoint-3500/generation_config.json +7 -0
  43. checkpoint-3500/optimizer.pt +3 -0
  44. checkpoint-3500/pytorch_model.bin +3 -0
  45. checkpoint-3500/rng_state.pth +3 -0
  46. checkpoint-3500/scheduler.pt +3 -0
  47. checkpoint-3500/trainer_state.json +66 -0
  48. checkpoint-3500/training_args.bin +3 -0
  49. checkpoint-4000/config.json +31 -0
  50. checkpoint-4000/generation_config.json +7 -0
checkpoint-1000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f45ef9f300805bd7bd9ee9e17f013ccb8f99f7c3665f816aedbe9deb47a2155
3
+ size 2401526789
checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2fa5a2713f888c3050cc4aca3945cec891383f054793e5057b48e5c9c22e04ec
3
+ size 1200772613
checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88039c0b1cb828fa39bf437a0b9f69a2015fd2ae4e64d267caf30cd11ef7355f
3
+ size 14575
checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cc11f0172d0c64c7c7a2ba46923922c59a4726fcb5d63f23d8426a3b6d8b3c0
3
+ size 627
checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.5293806246691372,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ }
22
+ ],
23
+ "max_steps": 5667,
24
+ "num_train_epochs": 3,
25
+ "total_flos": 1057499381760000.0,
26
+ "trial_name": null,
27
+ "trial_params": null
28
+ }
checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-1500/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-1500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-1500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b81bd9ed06bf94687ceddd71e476c468ba70649a73cfed189683f2e0b1696e0
3
+ size 2401526789
checkpoint-1500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c47aca9b57c3fb40c7799f9621e4031aafaa23912fad5faa211c67b23ba79265
3
+ size 1200772613
checkpoint-1500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:482b44b7dc29a71d483b84565c27cbb93c6be68f9399e9d8540e32f90bd13b05
3
+ size 14575
checkpoint-1500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd61bf3f22965f9d5c7a2e2b71d89f18e486198ff404d4f69e3ab1b9b5ceb9cd
3
+ size 627
checkpoint-1500/trainer_state.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.7940709370037057,
5
+ "global_step": 1500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.79,
24
+ "learning_rate": 1.470619375330863e-05,
25
+ "loss": 5.7273,
26
+ "step": 1500
27
+ }
28
+ ],
29
+ "max_steps": 5667,
30
+ "num_train_epochs": 3,
31
+ "total_flos": 1586249072640000.0,
32
+ "trial_name": null,
33
+ "trial_params": null
34
+ }
checkpoint-1500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-2000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-2000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-2000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf1d9818130cd02c940d5d8336f5d1284dc7a6ecddc52fdfe95895208210c6fd
3
+ size 2401526789
checkpoint-2000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:644a5f90ce26df7b30484012cadcf8fb54f7db436b6fa4e0c4f8e573906cc82a
3
+ size 1200772613
checkpoint-2000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0807229f3c019e66583ec287e344a67996c749da32981aa1c3321e1e4823e0bf
3
+ size 14575
checkpoint-2000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a1a0e4eab8ce503746f69b24db3f656d4a4fe1ecfa7fbaef5246142f9d9eaa3
3
+ size 627
checkpoint-2000/trainer_state.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.0587612493382743,
5
+ "global_step": 2000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.79,
24
+ "learning_rate": 1.470619375330863e-05,
25
+ "loss": 5.7273,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_loss": 1.1415340900421143,
31
+ "eval_runtime": 9.212,
32
+ "eval_samples_per_second": 182.262,
33
+ "eval_steps_per_second": 11.398,
34
+ "step": 1889
35
+ },
36
+ {
37
+ "epoch": 1.06,
38
+ "learning_rate": 1.2941591671078173e-05,
39
+ "loss": 3.094,
40
+ "step": 2000
41
+ }
42
+ ],
43
+ "max_steps": 5667,
44
+ "num_train_epochs": 3,
45
+ "total_flos": 2114205638983680.0,
46
+ "trial_name": null,
47
+ "trial_params": null
48
+ }
checkpoint-2000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-2500/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-2500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-2500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6ab769569a0f6297712e0d1132bc34a6f9499fd66c17d6d9753b8e4b01dca50
3
+ size 2401526789
checkpoint-2500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c9470a076634df44c2321615cf07e9d368d61a7245772804eedc85826a270ee
3
+ size 1200772613
checkpoint-2500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c12a9660982f34892a3560892dee7f01db80737f783c60343844469e4fba2e8
3
+ size 14575
checkpoint-2500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebf6ede8433b1773428ee47937b6ffe5c3543130f4a7ab751a257c6e8e3052d3
3
+ size 627
checkpoint-2500/trainer_state.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.3234515616728428,
5
+ "global_step": 2500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.79,
24
+ "learning_rate": 1.470619375330863e-05,
25
+ "loss": 5.7273,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_loss": 1.1415340900421143,
31
+ "eval_runtime": 9.212,
32
+ "eval_samples_per_second": 182.262,
33
+ "eval_steps_per_second": 11.398,
34
+ "step": 1889
35
+ },
36
+ {
37
+ "epoch": 1.06,
38
+ "learning_rate": 1.2941591671078173e-05,
39
+ "loss": 3.094,
40
+ "step": 2000
41
+ },
42
+ {
43
+ "epoch": 1.32,
44
+ "learning_rate": 1.1176989588847715e-05,
45
+ "loss": 1.946,
46
+ "step": 2500
47
+ }
48
+ ],
49
+ "max_steps": 5667,
50
+ "num_train_epochs": 3,
51
+ "total_flos": 2642955329863680.0,
52
+ "trial_name": null,
53
+ "trial_params": null
54
+ }
checkpoint-2500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-3000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-3000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-3000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b440251557d010731a799b2b1054ac12ac9ef3baba445d0342bcc95cc7a3a3d9
3
+ size 2401526789
checkpoint-3000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33ed3c39ec5a5bfc19af733c0442c5109470f6e4b8595cbb0b09fb2cea9d79af
3
+ size 1200772613
checkpoint-3000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:939ebb481cc3c107a086817849ed472b19c0ffe17e14943d08ee61ed9339729b
3
+ size 14575
checkpoint-3000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328d3e4f068dad4335ced61e817465ec2336fbe70ef3923d7474e80b9e56c3f2
3
+ size 627
checkpoint-3000/trainer_state.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.5881418740074114,
5
+ "global_step": 3000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.79,
24
+ "learning_rate": 1.470619375330863e-05,
25
+ "loss": 5.7273,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_loss": 1.1415340900421143,
31
+ "eval_runtime": 9.212,
32
+ "eval_samples_per_second": 182.262,
33
+ "eval_steps_per_second": 11.398,
34
+ "step": 1889
35
+ },
36
+ {
37
+ "epoch": 1.06,
38
+ "learning_rate": 1.2941591671078173e-05,
39
+ "loss": 3.094,
40
+ "step": 2000
41
+ },
42
+ {
43
+ "epoch": 1.32,
44
+ "learning_rate": 1.1176989588847715e-05,
45
+ "loss": 1.946,
46
+ "step": 2500
47
+ },
48
+ {
49
+ "epoch": 1.59,
50
+ "learning_rate": 9.41238750661726e-06,
51
+ "loss": 1.3926,
52
+ "step": 3000
53
+ }
54
+ ],
55
+ "max_steps": 5667,
56
+ "num_train_epochs": 3,
57
+ "total_flos": 3171705020743680.0,
58
+ "trial_name": null,
59
+ "trial_params": null
60
+ }
checkpoint-3000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-3500/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-3500/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }
checkpoint-3500/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c333accc8e0fe0febadaec30a8be8bc1e7224019f54ca7a8c32a81bd6d46679
3
+ size 2401526789
checkpoint-3500/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14904e15689dd20acb1fcddb7cff99524cd14e848a15f4c7f80c7a0eaece250c
3
+ size 1200772613
checkpoint-3500/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2df463cabe1c61d85e4b8c2b433630ac70d399fc7a7afb113dbd157c56e8342
3
+ size 14575
checkpoint-3500/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:529a4061e9ddf3e0cf9f741b46526bf6994e2540c2b2efdf50582f3498305bdf
3
+ size 627
checkpoint-3500/trainer_state.json ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 1.8528321863419799,
5
+ "global_step": 3500,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.26,
12
+ "learning_rate": 1.8235397917769543e-05,
13
+ "loss": 21.3637,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.53,
18
+ "learning_rate": 1.6470795835539088e-05,
19
+ "loss": 10.4706,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.79,
24
+ "learning_rate": 1.470619375330863e-05,
25
+ "loss": 5.7273,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 1.0,
30
+ "eval_loss": 1.1415340900421143,
31
+ "eval_runtime": 9.212,
32
+ "eval_samples_per_second": 182.262,
33
+ "eval_steps_per_second": 11.398,
34
+ "step": 1889
35
+ },
36
+ {
37
+ "epoch": 1.06,
38
+ "learning_rate": 1.2941591671078173e-05,
39
+ "loss": 3.094,
40
+ "step": 2000
41
+ },
42
+ {
43
+ "epoch": 1.32,
44
+ "learning_rate": 1.1176989588847715e-05,
45
+ "loss": 1.946,
46
+ "step": 2500
47
+ },
48
+ {
49
+ "epoch": 1.59,
50
+ "learning_rate": 9.41238750661726e-06,
51
+ "loss": 1.3926,
52
+ "step": 3000
53
+ },
54
+ {
55
+ "epoch": 1.85,
56
+ "learning_rate": 7.6477854243868e-06,
57
+ "loss": 1.0576,
58
+ "step": 3500
59
+ }
60
+ ],
61
+ "max_steps": 5667,
62
+ "num_train_epochs": 3,
63
+ "total_flos": 3700454711623680.0,
64
+ "trial_name": null,
65
+ "trial_params": null
66
+ }
checkpoint-3500/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8
3
+ size 3963
checkpoint-4000/config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "google/mt5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 1024,
7
+ "d_kv": 64,
8
+ "d_model": 512,
9
+ "decoder_start_token_id": 0,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 1,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 8,
20
+ "num_heads": 6,
21
+ "num_layers": 8,
22
+ "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
+ "relative_attention_num_buckets": 32,
25
+ "tie_word_embeddings": false,
26
+ "tokenizer_class": "T5Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.30.2",
29
+ "use_cache": true,
30
+ "vocab_size": 250112
31
+ }
checkpoint-4000/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2"
7
+ }