diff --git a/checkpoint-1000/config.json b/checkpoint-1000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-1000/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-1000/generation_config.json b/checkpoint-1000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-1000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..248e4b951a7efeb51e408ad6dbc73fdc4a503839 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f45ef9f300805bd7bd9ee9e17f013ccb8f99f7c3665f816aedbe9deb47a2155 +size 2401526789 diff --git a/checkpoint-1000/pytorch_model.bin b/checkpoint-1000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..28977c11e2437f434c9aa495af747432f9254a7d --- /dev/null +++ b/checkpoint-1000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fa5a2713f888c3050cc4aca3945cec891383f054793e5057b48e5c9c22e04ec +size 1200772613 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..c68a1ab55095b23d45d807bf36d5da10d799bf1b --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88039c0b1cb828fa39bf437a0b9f69a2015fd2ae4e64d267caf30cd11ef7355f +size 14575 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..0a9379c34d12f1a21398eb8674f928e4c3f43d05 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc11f0172d0c64c7c7a2ba46923922c59a4726fcb5d63f23d8426a3b6d8b3c0 +size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..60dadbbe06d50bb524c97dfa65a13b2e03dc3a54 --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,28 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5293806246691372, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 1057499381760000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-1500/config.json b/checkpoint-1500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-1500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-1500/generation_config.json b/checkpoint-1500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-1500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-1500/optimizer.pt b/checkpoint-1500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..73518c400c368af4745cf18e98075567d7779721 --- /dev/null +++ b/checkpoint-1500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b81bd9ed06bf94687ceddd71e476c468ba70649a73cfed189683f2e0b1696e0 +size 2401526789 diff --git a/checkpoint-1500/pytorch_model.bin b/checkpoint-1500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..810472d36159f5949f1b50ce16b5969bbb261894 --- /dev/null +++ b/checkpoint-1500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c47aca9b57c3fb40c7799f9621e4031aafaa23912fad5faa211c67b23ba79265 +size 1200772613 diff --git a/checkpoint-1500/rng_state.pth b/checkpoint-1500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a8580c9b643101e9cd44fb4db21d9a8911f11078 --- /dev/null +++ b/checkpoint-1500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:482b44b7dc29a71d483b84565c27cbb93c6be68f9399e9d8540e32f90bd13b05 +size 14575 diff --git a/checkpoint-1500/scheduler.pt b/checkpoint-1500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..24785db2da2adb538913eae56bea0de5b486a9a3 --- /dev/null +++ b/checkpoint-1500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd61bf3f22965f9d5c7a2e2b71d89f18e486198ff404d4f69e3ab1b9b5ceb9cd +size 627 diff --git a/checkpoint-1500/trainer_state.json b/checkpoint-1500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4d383229ecb10853bb129cb61e8d2a92abc85ae7 --- /dev/null +++ b/checkpoint-1500/trainer_state.json @@ -0,0 +1,34 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7940709370037057, + "global_step": 1500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 1586249072640000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1500/training_args.bin b/checkpoint-1500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-1500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-2000/config.json b/checkpoint-2000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-2000/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-2000/generation_config.json b/checkpoint-2000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-2000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..bc157f75ac96d90ff95a592f82c4e2cecce0a7ca --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1d9818130cd02c940d5d8336f5d1284dc7a6ecddc52fdfe95895208210c6fd +size 2401526789 diff --git a/checkpoint-2000/pytorch_model.bin b/checkpoint-2000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fa979f91b95283a259611303bd3ecdb5520e1a85 --- /dev/null +++ b/checkpoint-2000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:644a5f90ce26df7b30484012cadcf8fb54f7db436b6fa4e0c4f8e573906cc82a +size 1200772613 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..a6624479027a84e5f3544a318c64a4e5223ec675 --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0807229f3c019e66583ec287e344a67996c749da32981aa1c3321e1e4823e0bf +size 14575 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ede22817c62099f3ad015efc2ca1285d1776370f --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a1a0e4eab8ce503746f69b24db3f656d4a4fe1ecfa7fbaef5246142f9d9eaa3 +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ad3d4feb96ba79ccd96a68fa54f5172c5cc74bc3 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,48 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0587612493382743, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 2114205638983680.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-2500/config.json b/checkpoint-2500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-2500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-2500/generation_config.json b/checkpoint-2500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-2500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-2500/optimizer.pt b/checkpoint-2500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0258dc3f41279908a14511c92fa526b5fcd80e1a --- /dev/null +++ b/checkpoint-2500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c6ab769569a0f6297712e0d1132bc34a6f9499fd66c17d6d9753b8e4b01dca50 +size 2401526789 diff --git a/checkpoint-2500/pytorch_model.bin b/checkpoint-2500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8db776b9918450dc56e702a9a1efbf64566d010e --- /dev/null +++ b/checkpoint-2500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c9470a076634df44c2321615cf07e9d368d61a7245772804eedc85826a270ee +size 1200772613 diff --git a/checkpoint-2500/rng_state.pth b/checkpoint-2500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..53736a2cb25bfafc0d4109787df0e00ae24fcdc2 --- /dev/null +++ b/checkpoint-2500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c12a9660982f34892a3560892dee7f01db80737f783c60343844469e4fba2e8 +size 14575 diff --git a/checkpoint-2500/scheduler.pt b/checkpoint-2500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..51966237ed1b74295011491884c113b2dcb3663f --- /dev/null +++ b/checkpoint-2500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebf6ede8433b1773428ee47937b6ffe5c3543130f4a7ab751a257c6e8e3052d3 +size 627 diff --git a/checkpoint-2500/trainer_state.json b/checkpoint-2500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..569359fafeb6f8b6c27c079af27d677a905103cf --- /dev/null +++ b/checkpoint-2500/trainer_state.json @@ -0,0 +1,54 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3234515616728428, + "global_step": 2500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 2642955329863680.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2500/training_args.bin b/checkpoint-2500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-2500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-3000/config.json b/checkpoint-3000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-3000/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-3000/generation_config.json b/checkpoint-3000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-3000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..35604bb394adccc2cf7f1654c6a148879477f0af --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b440251557d010731a799b2b1054ac12ac9ef3baba445d0342bcc95cc7a3a3d9 +size 2401526789 diff --git a/checkpoint-3000/pytorch_model.bin b/checkpoint-3000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..97f5ea8fa79109d75b411be6906182338f82db0b --- /dev/null +++ b/checkpoint-3000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ed3c39ec5a5bfc19af733c0442c5109470f6e4b8595cbb0b09fb2cea9d79af +size 1200772613 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..21467471356b9088d0ce56919d630577d61797cb --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:939ebb481cc3c107a086817849ed472b19c0ffe17e14943d08ee61ed9339729b +size 14575 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b37b524070d444e44fbe9471f67d2f157a5200a4 --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:328d3e4f068dad4335ced61e817465ec2336fbe70ef3923d7474e80b9e56c3f2 +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4574330ad641a91e1214be1a5a0af3062b98da69 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,60 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5881418740074114, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 3171705020743680.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-3500/config.json b/checkpoint-3500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-3500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-3500/generation_config.json b/checkpoint-3500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-3500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-3500/optimizer.pt b/checkpoint-3500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a333095fefeb9c147e45dff4ff0b1579687ccbc3 --- /dev/null +++ b/checkpoint-3500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c333accc8e0fe0febadaec30a8be8bc1e7224019f54ca7a8c32a81bd6d46679 +size 2401526789 diff --git a/checkpoint-3500/pytorch_model.bin b/checkpoint-3500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c6f8af58854029980cc9cc3f824c6ccc0482fa8e --- /dev/null +++ b/checkpoint-3500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:14904e15689dd20acb1fcddb7cff99524cd14e848a15f4c7f80c7a0eaece250c +size 1200772613 diff --git a/checkpoint-3500/rng_state.pth b/checkpoint-3500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ea66ab07329ba09a671b40c602fbe6ebfed4232 --- /dev/null +++ b/checkpoint-3500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2df463cabe1c61d85e4b8c2b433630ac70d399fc7a7afb113dbd157c56e8342 +size 14575 diff --git a/checkpoint-3500/scheduler.pt b/checkpoint-3500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b6644701691316f39e426289e760f222ea2f1e2e --- /dev/null +++ b/checkpoint-3500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:529a4061e9ddf3e0cf9f741b46526bf6994e2540c2b2efdf50582f3498305bdf +size 627 diff --git a/checkpoint-3500/trainer_state.json b/checkpoint-3500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..54dc1770b25a298611efdd21c683e033a47bbf53 --- /dev/null +++ b/checkpoint-3500/trainer_state.json @@ -0,0 +1,66 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.8528321863419799, + "global_step": 3500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + }, + { + "epoch": 1.85, + "learning_rate": 7.6477854243868e-06, + "loss": 1.0576, + "step": 3500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 3700454711623680.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3500/training_args.bin b/checkpoint-3500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-3500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-4000/config.json b/checkpoint-4000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-4000/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-4000/generation_config.json b/checkpoint-4000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-4000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..738bd75b48cf4568bff54a419623400cd8c6c821 --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:222b5b1494fcc96da40374a0a7a3cadb3a234f23f350f6ab1d822d7230f83f83 +size 2401526789 diff --git a/checkpoint-4000/pytorch_model.bin b/checkpoint-4000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..7b677c503c4c15c618eb3ce5919e0911c7271454 --- /dev/null +++ b/checkpoint-4000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d347621be03bed7f866cf7c88b7e45f022ea0687804173d891b624d479a2e3d +size 1200772613 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1cca696978d0c830cda367180f0e0c0ae93e353e --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db3b203dea25305f0591dbcc099e992cbad972f441f2ba087edb646847992a4 +size 14575 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6d5349e5f1a39b37cc06eb4d46c8f25ecef8ce0d --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a00bccc2359933b32e0cdc803f37fcf0963460b81a7907a97ccbef055e9e102c +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..833f10e290746699ce1ba679f227c7947bb5850b --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,80 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.1175224986765486, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + }, + { + "epoch": 1.85, + "learning_rate": 7.6477854243868e-06, + "loss": 1.0576, + "step": 3500 + }, + { + "epoch": 2.0, + "eval_loss": 0.2452467828989029, + "eval_runtime": 9.2184, + "eval_samples_per_second": 182.137, + "eval_steps_per_second": 11.39, + "step": 3778 + }, + { + "epoch": 2.12, + "learning_rate": 5.883183342156344e-06, + "loss": 0.8478, + "step": 4000 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 4228411277967360.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-4500/config.json b/checkpoint-4500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-4500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-4500/generation_config.json b/checkpoint-4500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-4500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-4500/optimizer.pt b/checkpoint-4500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..337d14a71a42d60cd146a65cbc451a0bb8164633 --- /dev/null +++ b/checkpoint-4500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57290f0b116aef41455e9acd88a8ac041cb06b15526092712f8aeab964bebe27 +size 2401526789 diff --git a/checkpoint-4500/pytorch_model.bin b/checkpoint-4500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..37a31195b38b784f03a012faf40fefd55df47e14 --- /dev/null +++ b/checkpoint-4500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181c3c1056fb6f7e91f823db4f4d3d2bbef2238f45beea642a50f35aa8961985 +size 1200772613 diff --git a/checkpoint-4500/rng_state.pth b/checkpoint-4500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1becc2e710592bd5c356d0bee39d5a3f7abe0e2c --- /dev/null +++ b/checkpoint-4500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd2978b1e62f9f5dd005ab8d6882ff6febc16303fde4a324749a7ef2b69ab7f8 +size 14575 diff --git a/checkpoint-4500/scheduler.pt b/checkpoint-4500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c96df8a7c92955f368116a4370496f65d1d9760c --- /dev/null +++ b/checkpoint-4500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9eec399b605ef1bba892e241027c18005a1a5d501cae99cafa2abcf861758fd8 +size 627 diff --git a/checkpoint-4500/trainer_state.json b/checkpoint-4500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8f323717d7416af751407129fa9f01fcfb2078ec --- /dev/null +++ b/checkpoint-4500/trainer_state.json @@ -0,0 +1,86 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.382212811011117, + "global_step": 4500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + }, + { + "epoch": 1.85, + "learning_rate": 7.6477854243868e-06, + "loss": 1.0576, + "step": 3500 + }, + { + "epoch": 2.0, + "eval_loss": 0.2452467828989029, + "eval_runtime": 9.2184, + "eval_samples_per_second": 182.137, + "eval_steps_per_second": 11.39, + "step": 3778 + }, + { + "epoch": 2.12, + "learning_rate": 5.883183342156344e-06, + "loss": 0.8478, + "step": 4000 + }, + { + "epoch": 2.38, + "learning_rate": 4.118581259925887e-06, + "loss": 0.71, + "step": 4500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 4757160968847360.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4500/training_args.bin b/checkpoint-4500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-4500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-500/config.json b/checkpoint-500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-500/generation_config.json b/checkpoint-500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-500/optimizer.pt b/checkpoint-500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ad99138a3259efdee75dd93333d0688254b73a15 --- /dev/null +++ b/checkpoint-500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd6577864e307a6a05850678437d1abdc077a5decf378465fb7148a642ad9fb2 +size 2401526789 diff --git a/checkpoint-500/pytorch_model.bin b/checkpoint-500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..da93b092d1828c1b8aea451d3c7ef94af1396e07 --- /dev/null +++ b/checkpoint-500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:181977916060064998068b555dc395120174a72152d43055f39e13294a2d288a +size 1200772613 diff --git a/checkpoint-500/rng_state.pth b/checkpoint-500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b8f99b6f29cd0ac51dc15127b23a498c409d6dd2 --- /dev/null +++ b/checkpoint-500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:acd9147173ef498be25967623b269bd1628d5cea087f2ff089e3f62f27e1908e +size 14575 diff --git a/checkpoint-500/scheduler.pt b/checkpoint-500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a129fcb90aa029a198b7402fe85f98de6d0c4f1a --- /dev/null +++ b/checkpoint-500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f6c65846f0497c382253218ace899bffd34751a8d7fc48c263a099b43b279cb6 +size 627 diff --git a/checkpoint-500/trainer_state.json b/checkpoint-500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..83c491506ddc1295b9655228385b9d722278dd6e --- /dev/null +++ b/checkpoint-500/trainer_state.json @@ -0,0 +1,22 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2646903123345686, + "global_step": 500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 528749690880000.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-500/training_args.bin b/checkpoint-500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-5000/config.json b/checkpoint-5000/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-5000/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-5000/generation_config.json b/checkpoint-5000/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-5000/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9839bc7e100b905d3d81ea7cdcf03cc1ff59945 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1777ade5e105192dd547f94d17c41de6a5c4885e2181e5809febc13f883f395 +size 2401526789 diff --git a/checkpoint-5000/pytorch_model.bin b/checkpoint-5000/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5e892bd76a48bcb4fba95a2e0366613d56663c97 --- /dev/null +++ b/checkpoint-5000/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7b8ee87aadf873c02572889c4dbeb87847afb24d7c5d27ff593f4d140e590a9 +size 1200772613 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..ed0b7411caf00de84fe58fa113d77e388d55a0e5 --- /dev/null +++ b/checkpoint-5000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3622d57dda77b018baf6f40417210e9d1abcbe70d72e8ba0e90beb63b9aaccc1 +size 14575 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dd15893a5235c2ba1b107d724e8788f2efa891ca --- /dev/null +++ b/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05b43a442b8dd6d3e1b81c3d0a4d3ee3cbf0b8b804fd8223686b56bc292048a6 +size 627 diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a971af16dac4f11d48cfe4f2ee8c49543acc92cf --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,92 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.6469031233456857, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + }, + { + "epoch": 1.85, + "learning_rate": 7.6477854243868e-06, + "loss": 1.0576, + "step": 3500 + }, + { + "epoch": 2.0, + "eval_loss": 0.2452467828989029, + "eval_runtime": 9.2184, + "eval_samples_per_second": 182.137, + "eval_steps_per_second": 11.39, + "step": 3778 + }, + { + "epoch": 2.12, + "learning_rate": 5.883183342156344e-06, + "loss": 0.8478, + "step": 4000 + }, + { + "epoch": 2.38, + "learning_rate": 4.118581259925887e-06, + "loss": 0.71, + "step": 4500 + }, + { + "epoch": 2.65, + "learning_rate": 2.3539791776954297e-06, + "loss": 0.6478, + "step": 5000 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 5285910659727360.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/checkpoint-5500/config.json b/checkpoint-5500/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/checkpoint-5500/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/checkpoint-5500/generation_config.json b/checkpoint-5500/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/checkpoint-5500/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/checkpoint-5500/optimizer.pt b/checkpoint-5500/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..64b582f3ae654dffb0b699f8573335bc39bf6e19 --- /dev/null +++ b/checkpoint-5500/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0fd957c85a40c39a86c1d9e1e9b4549e248fe388b39379d203ae59e39eaecdaa +size 2401526789 diff --git a/checkpoint-5500/pytorch_model.bin b/checkpoint-5500/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..0b82ffcc999e13184b22d7a7cdf2174da47bd5f0 --- /dev/null +++ b/checkpoint-5500/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7c9cd7289b797ece737a9e88f248b8bb1d9adf6aa03408867075e686b66e32c +size 1200772613 diff --git a/checkpoint-5500/rng_state.pth b/checkpoint-5500/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b2788c60c0f1123b9c2c2b02bb5a005c44973ab9 --- /dev/null +++ b/checkpoint-5500/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebe6db2f42c450f855e5761f4eb3457398c6770188de8744993a0023efb811a6 +size 14575 diff --git a/checkpoint-5500/scheduler.pt b/checkpoint-5500/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..4d6750f63da54ae01f83942f3d3b1b1edbe8f500 --- /dev/null +++ b/checkpoint-5500/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3c77f359bf59caf50d63f8e3f30360a0189dfc37bec86fb669e90d24cc5cdfc +size 627 diff --git a/checkpoint-5500/trainer_state.json b/checkpoint-5500/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0933dc2551ad09fadc39a556457cde7d6c427def --- /dev/null +++ b/checkpoint-5500/trainer_state.json @@ -0,0 +1,98 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.911593435680254, + "global_step": 5500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.26, + "learning_rate": 1.8235397917769543e-05, + "loss": 21.3637, + "step": 500 + }, + { + "epoch": 0.53, + "learning_rate": 1.6470795835539088e-05, + "loss": 10.4706, + "step": 1000 + }, + { + "epoch": 0.79, + "learning_rate": 1.470619375330863e-05, + "loss": 5.7273, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_loss": 1.1415340900421143, + "eval_runtime": 9.212, + "eval_samples_per_second": 182.262, + "eval_steps_per_second": 11.398, + "step": 1889 + }, + { + "epoch": 1.06, + "learning_rate": 1.2941591671078173e-05, + "loss": 3.094, + "step": 2000 + }, + { + "epoch": 1.32, + "learning_rate": 1.1176989588847715e-05, + "loss": 1.946, + "step": 2500 + }, + { + "epoch": 1.59, + "learning_rate": 9.41238750661726e-06, + "loss": 1.3926, + "step": 3000 + }, + { + "epoch": 1.85, + "learning_rate": 7.6477854243868e-06, + "loss": 1.0576, + "step": 3500 + }, + { + "epoch": 2.0, + "eval_loss": 0.2452467828989029, + "eval_runtime": 9.2184, + "eval_samples_per_second": 182.137, + "eval_steps_per_second": 11.39, + "step": 3778 + }, + { + "epoch": 2.12, + "learning_rate": 5.883183342156344e-06, + "loss": 0.8478, + "step": 4000 + }, + { + "epoch": 2.38, + "learning_rate": 4.118581259925887e-06, + "loss": 0.71, + "step": 4500 + }, + { + "epoch": 2.65, + "learning_rate": 2.3539791776954297e-06, + "loss": 0.6478, + "step": 5000 + }, + { + "epoch": 2.91, + "learning_rate": 5.893770954649726e-07, + "loss": 0.6182, + "step": 5500 + } + ], + "max_steps": 5667, + "num_train_epochs": 3, + "total_flos": 5814660350607360.0, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5500/training_args.bin b/checkpoint-5500/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..edda9a4eabe29732a97cbf92cb19dfff789a056c --- /dev/null +++ b/checkpoint-5500/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064d4303654366a9465079fa2f4055aa06f3490482be71543d158a2eea2ea5f8 +size 3963 diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..8fd0ff4f9af6140e8d2f25f44915b34dc6d26f42 --- /dev/null +++ b/config.json @@ -0,0 +1,31 @@ +{ + "_name_or_path": "google/mt5-small", + "architectures": [ + "T5ForConditionalGeneration" + ], + "d_ff": 1024, + "d_kv": 64, + "d_model": 512, + "decoder_start_token_id": 0, + "dense_act_fn": "gelu_new", + "dropout_rate": 0.1, + "eos_token_id": 1, + "feed_forward_proj": "gated-gelu", + "initializer_factor": 1.0, + "is_encoder_decoder": true, + "is_gated_act": true, + "layer_norm_epsilon": 1e-06, + "model_type": "t5", + "num_decoder_layers": 8, + "num_heads": 6, + "num_layers": 8, + "pad_token_id": 0, + "relative_attention_max_distance": 128, + "relative_attention_num_buckets": 32, + "tie_word_embeddings": false, + "tokenizer_class": "T5Tokenizer", + "torch_dtype": "float32", + "transformers_version": "4.30.2", + "use_cache": true, + "vocab_size": 250112 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..144e9968e1ce77001f065b1e01ce7d39571e3526 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,7 @@ +{ + "_from_model_config": true, + "decoder_start_token_id": 0, + "eos_token_id": 1, + "pad_token_id": 0, + "transformers_version": "4.30.2" +} diff --git a/pytorch_model.bin b/pytorch_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..718fbd111d7e637778e002188b6cfd942480729c --- /dev/null +++ b/pytorch_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e1a266840fbf50d37be2a0583dafad9d4db22b436b015d5e7eb9db9e18fddfe +size 1200772613 diff --git a/runs/Jan07_09-51-50_hendrixgpu22fl.unicph.domain/events.out.tfevents.1736239910.hendrixgpu22fl.unicph.domain.918642.0 b/runs/Jan07_09-51-50_hendrixgpu22fl.unicph.domain/events.out.tfevents.1736239910.hendrixgpu22fl.unicph.domain.918642.0 new file mode 100644 index 0000000000000000000000000000000000000000..26d5dbfd8ff03b4b8f2ca210bdd8e2bdc35f6856 --- /dev/null +++ b/runs/Jan07_09-51-50_hendrixgpu22fl.unicph.domain/events.out.tfevents.1736239910.hendrixgpu22fl.unicph.domain.918642.0 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c217dd599796d411681322106f04f99e8f79d5bd67474aa98e0aeb954f1ad87 +size 6993 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..79ae7ea5bf033de69d0055820c57885e3d377bbb --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,5 @@ +{ + "eos_token": "", + "pad_token": "", + "unk_token": "" +} diff --git a/spiece.model b/spiece.model new file mode 100644 index 0000000000000000000000000000000000000000..e417801865fd66bd40f9d45d46b6d0d0c2aa36b6 --- /dev/null +++ b/spiece.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6 +size 4309802 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..01286a9ca39760175875dfb7ba32dfa0e9cbff17 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,11 @@ +{ + "additional_special_tokens": null, + "clean_up_tokenization_spaces": true, + "eos_token": "", + "extra_ids": 0, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "T5Tokenizer", + "unk_token": "" +}