diff --git a/model.safetensors b/model.safetensors index ccb8c972557f85879fd6a14d6c3e55ea15530b7f..0213d4c88d411523a6dc3f79b97b864d86e09a77 100644 --- a/model.safetensors +++ b/model.safetensors @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4c0e48f30763fd0ec52154bdda6e91ed85ffc6fd3ed79f829fb02df5c50aa9fa +oid sha256:6bc896c70d6094782d316ed4ff72f355a62338eb7a17610edb8898dde5fe8c59 size 470641664 diff --git a/run-3/checkpoint-106/config.json b/run-3/checkpoint-106/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-3/checkpoint-106/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-3/checkpoint-106/model.safetensors b/run-3/checkpoint-106/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ccb8c972557f85879fd6a14d6c3e55ea15530b7f --- /dev/null +++ b/run-3/checkpoint-106/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0e48f30763fd0ec52154bdda6e91ed85ffc6fd3ed79f829fb02df5c50aa9fa +size 470641664 diff --git a/run-3/checkpoint-106/optimizer.pt b/run-3/checkpoint-106/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..800539d2b9ca7097429919e651efad907ba9d9e1 --- /dev/null +++ b/run-3/checkpoint-106/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e1066f156de9416ec408f6a47e725f9a0e00846fb791e971dd5474ef15ba7ab +size 941404410 diff --git a/run-3/checkpoint-106/rng_state.pth b/run-3/checkpoint-106/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7f49578e92281a64ed42722f453f635f775c26fd --- /dev/null +++ b/run-3/checkpoint-106/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9461ce57ca642305c3edafd8c0e0cfa746276df6556c74321702a8635b1ffc1 +size 14180 diff --git a/run-3/checkpoint-106/scheduler.pt b/run-3/checkpoint-106/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f6292db65d584db96b9f01a6c24b756ad38e2d36 --- /dev/null +++ b/run-3/checkpoint-106/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e08949a8c9619e1178b5b8b3d8ff36b49af0be839f5c43e356640db1ae24bf2 +size 1064 diff --git a/run-3/checkpoint-106/sentencepiece.bpe.model b/run-3/checkpoint-106/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-3/checkpoint-106/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-3/checkpoint-106/special_tokens_map.json b/run-3/checkpoint-106/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-3/checkpoint-106/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-3/checkpoint-106/tokenizer.json b/run-3/checkpoint-106/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-3/checkpoint-106/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-3/checkpoint-106/tokenizer_config.json b/run-3/checkpoint-106/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-3/checkpoint-106/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-3/checkpoint-106/trainer_state.json b/run-3/checkpoint-106/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..3273f10b679e53863e1606bcf8f9369bb1be9168 --- /dev/null +++ b/run-3/checkpoint-106/trainer_state.json @@ -0,0 +1,35 @@ +{ + "best_metric": 0.7993119266055045, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-106", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 106, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.7993119266055045, + "eval_loss": 0.4503490626811981, + "eval_runtime": 0.9477, + "eval_samples_per_second": 920.118, + "eval_steps_per_second": 58.035, + "step": 106 + } + ], + "logging_steps": 500, + "max_steps": 424, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "learning_rate": 2.3419555649158772e-05, + "num_train_epochs": 4, + "per_device_train_batch_size": 64, + "seed": 10 + } +} diff --git a/run-3/checkpoint-106/training_args.bin b/run-3/checkpoint-106/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee --- /dev/null +++ b/run-3/checkpoint-106/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e +size 4984 diff --git a/run-3/checkpoint-212/config.json b/run-3/checkpoint-212/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-3/checkpoint-212/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-3/checkpoint-212/model.safetensors b/run-3/checkpoint-212/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5c0a069bff8767d0a4b2392e6e75ab1ea9c28e87 --- /dev/null +++ b/run-3/checkpoint-212/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c18dc7de4e55089ad012e0cbd4d5998b2e9d5ad8b1ebbacf65813fb3e3ba447e +size 470641664 diff --git a/run-3/checkpoint-212/optimizer.pt b/run-3/checkpoint-212/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..ecf1582d96981816e072fda3445380abaed53912 --- /dev/null +++ b/run-3/checkpoint-212/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c0c1491054c487e3608157f3ae9903823d0a16bc915479d65af9231c53a726fa +size 941404410 diff --git a/run-3/checkpoint-212/rng_state.pth b/run-3/checkpoint-212/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd729eb1ac66d9869c99dabf8e06f9f8fe108721 --- /dev/null +++ b/run-3/checkpoint-212/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4aad64024c239e0abbb01ad3997703e20fef94d4867ddbd3a89f17b07d74ce20 +size 14180 diff --git a/run-3/checkpoint-212/scheduler.pt b/run-3/checkpoint-212/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ec69e2cf7a8a2afa8a1c7c578fbbc38a9d6ff14 --- /dev/null +++ b/run-3/checkpoint-212/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:00fa9134e8899ea0c6296021cb99f5036db8a95308bc03e8ab78743a7b8e3931 +size 1064 diff --git a/run-3/checkpoint-212/sentencepiece.bpe.model b/run-3/checkpoint-212/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-3/checkpoint-212/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-3/checkpoint-212/special_tokens_map.json b/run-3/checkpoint-212/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-3/checkpoint-212/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-3/checkpoint-212/tokenizer.json b/run-3/checkpoint-212/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-3/checkpoint-212/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-3/checkpoint-212/tokenizer_config.json b/run-3/checkpoint-212/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-3/checkpoint-212/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-3/checkpoint-212/trainer_state.json b/run-3/checkpoint-212/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f580cfd2e15952c76e1b390a060249ca61d3464a --- /dev/null +++ b/run-3/checkpoint-212/trainer_state.json @@ -0,0 +1,44 @@ +{ + "best_metric": 0.8073394495412844, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-212", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 212, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.7993119266055045, + "eval_loss": 0.4503490626811981, + "eval_runtime": 0.9477, + "eval_samples_per_second": 920.118, + "eval_steps_per_second": 58.035, + "step": 106 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8073394495412844, + "eval_loss": 0.43257269263267517, + "eval_runtime": 1.0392, + "eval_samples_per_second": 839.146, + "eval_steps_per_second": 52.928, + "step": 212 + } + ], + "logging_steps": 500, + "max_steps": 424, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "learning_rate": 2.3419555649158772e-05, + "num_train_epochs": 4, + "per_device_train_batch_size": 64, + "seed": 10 + } +} diff --git a/run-3/checkpoint-212/training_args.bin b/run-3/checkpoint-212/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee --- /dev/null +++ b/run-3/checkpoint-212/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e +size 4984 diff --git a/run-3/checkpoint-318/config.json b/run-3/checkpoint-318/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-3/checkpoint-318/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-3/checkpoint-318/model.safetensors b/run-3/checkpoint-318/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ea83dca43671ca52704a47e8be4115a1b469dfa5 --- /dev/null +++ b/run-3/checkpoint-318/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b00986ec8e99ec9bfcd4ccdf742bdb4e2a30555988d621183287a83b45d2c73 +size 470641664 diff --git a/run-3/checkpoint-318/optimizer.pt b/run-3/checkpoint-318/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c661fef9b06835af59a40a3fcf6ff4e8d3c117b2 --- /dev/null +++ b/run-3/checkpoint-318/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63ab24a7117e8e6a90bcff53709902d9e3ed43c0d72105ff7537f850458bbef8 +size 941404410 diff --git a/run-3/checkpoint-318/rng_state.pth b/run-3/checkpoint-318/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bd6dc93fce1ba00813e859cf55af276bc62ed80f --- /dev/null +++ b/run-3/checkpoint-318/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e16d62d9a350bce2c01b07772d36050731efc5c4f5f31285fef551875e7831a0 +size 14180 diff --git a/run-3/checkpoint-318/scheduler.pt b/run-3/checkpoint-318/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..933b3793475cc0a298987763475699cc04286515 --- /dev/null +++ b/run-3/checkpoint-318/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8898f534b95e18f98550bf2084744a1bc461a6981327828253dbbb1ebe6ee9fa +size 1064 diff --git a/run-3/checkpoint-318/sentencepiece.bpe.model b/run-3/checkpoint-318/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-3/checkpoint-318/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-3/checkpoint-318/special_tokens_map.json b/run-3/checkpoint-318/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-3/checkpoint-318/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-3/checkpoint-318/tokenizer.json b/run-3/checkpoint-318/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-3/checkpoint-318/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-3/checkpoint-318/tokenizer_config.json b/run-3/checkpoint-318/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-3/checkpoint-318/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-3/checkpoint-318/trainer_state.json b/run-3/checkpoint-318/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..6a07ac0a422a5f49999cbaf008e7eac6fddb2868 --- /dev/null +++ b/run-3/checkpoint-318/trainer_state.json @@ -0,0 +1,53 @@ +{ + "best_metric": 0.8337155963302753, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-318", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 318, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.7993119266055045, + "eval_loss": 0.4503490626811981, + "eval_runtime": 0.9477, + "eval_samples_per_second": 920.118, + "eval_steps_per_second": 58.035, + "step": 106 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8073394495412844, + "eval_loss": 0.43257269263267517, + "eval_runtime": 1.0392, + "eval_samples_per_second": 839.146, + "eval_steps_per_second": 52.928, + "step": 212 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8337155963302753, + "eval_loss": 0.40033113956451416, + "eval_runtime": 1.028, + "eval_samples_per_second": 848.263, + "eval_steps_per_second": 53.503, + "step": 318 + } + ], + "logging_steps": 500, + "max_steps": 424, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "learning_rate": 2.3419555649158772e-05, + "num_train_epochs": 4, + "per_device_train_batch_size": 64, + "seed": 10 + } +} diff --git a/run-3/checkpoint-318/training_args.bin b/run-3/checkpoint-318/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee --- /dev/null +++ b/run-3/checkpoint-318/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e +size 4984 diff --git a/run-3/checkpoint-424/config.json b/run-3/checkpoint-424/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-3/checkpoint-424/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-3/checkpoint-424/model.safetensors b/run-3/checkpoint-424/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c8674bdf6a66e7229d783fb0bd06c379aeeb86a7 --- /dev/null +++ b/run-3/checkpoint-424/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f94b9f076b895c457be18902125e8ff79d84c0baf6e46a1668e5ca3d2a31c1c +size 470641664 diff --git a/run-3/checkpoint-424/optimizer.pt b/run-3/checkpoint-424/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..94b963d9497b4e7ff99f38b5342781ea802ab3f1 --- /dev/null +++ b/run-3/checkpoint-424/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b4711cddda48f5d9ebde562a702e2b08e5941687c545a60992c5f5dcc4e6f7a +size 941404410 diff --git a/run-3/checkpoint-424/rng_state.pth b/run-3/checkpoint-424/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..03c9c5dc09cf6a88abd646a4b00ddafb3ad42abf --- /dev/null +++ b/run-3/checkpoint-424/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:71a57c760827965bd0c415b117690931ee01b7b4021094d1f430b5c8361d6467 +size 14180 diff --git a/run-3/checkpoint-424/scheduler.pt b/run-3/checkpoint-424/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..990f7f36ec40a249bc732b5c3218d09f15d5d285 --- /dev/null +++ b/run-3/checkpoint-424/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:964e8e0e3bf7dd21a33ba2eb4569d1ffee98f70360217372207afc2c1d1d5018 +size 1064 diff --git a/run-3/checkpoint-424/sentencepiece.bpe.model b/run-3/checkpoint-424/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-3/checkpoint-424/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-3/checkpoint-424/special_tokens_map.json b/run-3/checkpoint-424/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-3/checkpoint-424/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-3/checkpoint-424/tokenizer.json b/run-3/checkpoint-424/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-3/checkpoint-424/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-3/checkpoint-424/tokenizer_config.json b/run-3/checkpoint-424/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-3/checkpoint-424/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-3/checkpoint-424/trainer_state.json b/run-3/checkpoint-424/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..405d2ab2f0b1f7185497c4bfe70de97e91250b9c --- /dev/null +++ b/run-3/checkpoint-424/trainer_state.json @@ -0,0 +1,62 @@ +{ + "best_metric": 0.8348623853211009, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-424", + "epoch": 4.0, + "eval_steps": 500, + "global_step": 424, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.0, + "eval_accuracy": 0.7993119266055045, + "eval_loss": 0.4503490626811981, + "eval_runtime": 0.9477, + "eval_samples_per_second": 920.118, + "eval_steps_per_second": 58.035, + "step": 106 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8073394495412844, + "eval_loss": 0.43257269263267517, + "eval_runtime": 1.0392, + "eval_samples_per_second": 839.146, + "eval_steps_per_second": 52.928, + "step": 212 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8337155963302753, + "eval_loss": 0.40033113956451416, + "eval_runtime": 1.028, + "eval_samples_per_second": 848.263, + "eval_steps_per_second": 53.503, + "step": 318 + }, + { + "epoch": 4.0, + "eval_accuracy": 0.8348623853211009, + "eval_loss": 0.4028107821941376, + "eval_runtime": 1.2011, + "eval_samples_per_second": 726.009, + "eval_steps_per_second": 45.792, + "step": 424 + } + ], + "logging_steps": 500, + "max_steps": 424, + "num_input_tokens_seen": 0, + "num_train_epochs": 4, + "save_steps": 500, + "total_flos": 0, + "train_batch_size": 64, + "trial_name": null, + "trial_params": { + "learning_rate": 2.3419555649158772e-05, + "num_train_epochs": 4, + "per_device_train_batch_size": 64, + "seed": 10 + } +} diff --git a/run-3/checkpoint-424/training_args.bin b/run-3/checkpoint-424/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee --- /dev/null +++ b/run-3/checkpoint-424/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e +size 4984 diff --git a/run-4/checkpoint-1684/config.json b/run-4/checkpoint-1684/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-4/checkpoint-1684/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-4/checkpoint-1684/model.safetensors b/run-4/checkpoint-1684/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8b421a1c2c9be40a4f23cc5c3b0d7fff487da185 --- /dev/null +++ b/run-4/checkpoint-1684/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4dc39d2132b33b4c6c39547e89bcf3368c78de7a7ace0433c0327aebd9b89887 +size 470641664 diff --git a/run-4/checkpoint-1684/optimizer.pt b/run-4/checkpoint-1684/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..d539bd5cc1dba8323d2818da49f31a8f0a8bfcef --- /dev/null +++ b/run-4/checkpoint-1684/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93f9e06421817aecd4158d12cdc51bfd3528093b145fd8ae22a448057c2201e4 +size 941404410 diff --git a/run-4/checkpoint-1684/rng_state.pth b/run-4/checkpoint-1684/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..9439ac8032790c4c44b1275476a07add00c1f6e7 --- /dev/null +++ b/run-4/checkpoint-1684/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c1ce78359f243cfa7302f5705f7e3ff4001b624a0e229968654129955cf4636 +size 14244 diff --git a/run-4/checkpoint-1684/scheduler.pt b/run-4/checkpoint-1684/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..930c464a07f04a988bb0dd1c5b5098b0add34371 --- /dev/null +++ b/run-4/checkpoint-1684/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36aa2b6b43cb7c4d638205aa6941a4bfdc273c2bfa008f9ac5ff98124d0e026a +size 1064 diff --git a/run-4/checkpoint-1684/sentencepiece.bpe.model b/run-4/checkpoint-1684/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-4/checkpoint-1684/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-4/checkpoint-1684/special_tokens_map.json b/run-4/checkpoint-1684/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-4/checkpoint-1684/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-4/checkpoint-1684/tokenizer.json b/run-4/checkpoint-1684/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-4/checkpoint-1684/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-4/checkpoint-1684/tokenizer_config.json b/run-4/checkpoint-1684/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-4/checkpoint-1684/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-4/checkpoint-1684/trainer_state.json b/run-4/checkpoint-1684/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..679a4bcffa4166394bbca4f248827b28c8d05ed8 --- /dev/null +++ b/run-4/checkpoint-1684/trainer_state.json @@ -0,0 +1,56 @@ +{ + "best_metric": 0.823394495412844, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-1684", + "epoch": 1.0, + "eval_steps": 500, + "global_step": 1684, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.3, + "grad_norm": 207.84652709960938, + "learning_rate": 1.6360069774388828e-05, + "loss": 0.5581, + "step": 500 + }, + { + "epoch": 0.59, + "grad_norm": 1.0724189281463623, + "learning_rate": 1.4563049807957717e-05, + "loss": 0.5489, + "step": 1000 + }, + { + "epoch": 0.89, + "grad_norm": 2.0987462997436523, + "learning_rate": 1.2766029841526607e-05, + "loss": 0.5061, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.823394495412844, + "eval_loss": 0.4638556241989136, + "eval_runtime": 1.0938, + "eval_samples_per_second": 797.217, + "eval_steps_per_second": 50.283, + "step": 1684 + } + ], + "logging_steps": 500, + "max_steps": 5052, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 21648969825168.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 1.8157089740819938e-05, + "num_train_epochs": 3, + "per_device_train_batch_size": 4, + "seed": 35 + } +} diff --git a/run-4/checkpoint-1684/training_args.bin b/run-4/checkpoint-1684/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67 --- /dev/null +++ b/run-4/checkpoint-1684/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160 +size 4984 diff --git a/run-4/checkpoint-3368/config.json b/run-4/checkpoint-3368/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-4/checkpoint-3368/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-4/checkpoint-3368/model.safetensors b/run-4/checkpoint-3368/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..bdbb41d949f7ef0b7d60eba5462cd743e7c4f68a --- /dev/null +++ b/run-4/checkpoint-3368/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a920204ecd53bd93d10e651560e02497ae635eb11e7469914e945b4a87ad141 +size 470641664 diff --git a/run-4/checkpoint-3368/optimizer.pt b/run-4/checkpoint-3368/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..92d25e47b43dd5282c025c0c129f6562c9ab923c --- /dev/null +++ b/run-4/checkpoint-3368/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a32629ebb40042e7312c84daa384b68ba7df7ec1d373a1dc74f36f1b93cd9208 +size 941404410 diff --git a/run-4/checkpoint-3368/rng_state.pth b/run-4/checkpoint-3368/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..1563db9409da7c33a95591816700da4bac5a97c8 --- /dev/null +++ b/run-4/checkpoint-3368/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dad14821ff48c37c8a36dfbc79c323a1ef6bcdb2b8e884d3f5044ddba5c12f78 +size 14244 diff --git a/run-4/checkpoint-3368/scheduler.pt b/run-4/checkpoint-3368/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..03a8b097a2cd7911982a3a480251af0026e7926c --- /dev/null +++ b/run-4/checkpoint-3368/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e5dd2d781a47db2d178ebea2c95d9162c1048f8a380d4575bda749bd805b29b2 +size 1064 diff --git a/run-4/checkpoint-3368/sentencepiece.bpe.model b/run-4/checkpoint-3368/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-4/checkpoint-3368/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-4/checkpoint-3368/special_tokens_map.json b/run-4/checkpoint-3368/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-4/checkpoint-3368/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-4/checkpoint-3368/tokenizer.json b/run-4/checkpoint-3368/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-4/checkpoint-3368/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-4/checkpoint-3368/tokenizer_config.json b/run-4/checkpoint-3368/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-4/checkpoint-3368/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-4/checkpoint-3368/trainer_state.json b/run-4/checkpoint-3368/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b13f435db7876065628d9530c155e55c67fcd8ac --- /dev/null +++ b/run-4/checkpoint-3368/trainer_state.json @@ -0,0 +1,86 @@ +{ + "best_metric": 0.8279816513761468, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-3368", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 3368, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.3, + "grad_norm": 207.84652709960938, + "learning_rate": 1.6360069774388828e-05, + "loss": 0.5581, + "step": 500 + }, + { + "epoch": 0.59, + "grad_norm": 1.0724189281463623, + "learning_rate": 1.4563049807957717e-05, + "loss": 0.5489, + "step": 1000 + }, + { + "epoch": 0.89, + "grad_norm": 2.0987462997436523, + "learning_rate": 1.2766029841526607e-05, + "loss": 0.5061, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.823394495412844, + "eval_loss": 0.4638556241989136, + "eval_runtime": 1.0938, + "eval_samples_per_second": 797.217, + "eval_steps_per_second": 50.283, + "step": 1684 + }, + { + "epoch": 1.19, + "grad_norm": 2.370238780975342, + "learning_rate": 1.0969009875095499e-05, + "loss": 0.458, + "step": 2000 + }, + { + "epoch": 1.48, + "grad_norm": 25.474912643432617, + "learning_rate": 9.171989908664387e-06, + "loss": 0.4448, + "step": 2500 + }, + { + "epoch": 1.78, + "grad_norm": 2.603041887283325, + "learning_rate": 7.374969942233276e-06, + "loss": 0.4212, + "step": 3000 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8279816513761468, + "eval_loss": 0.5386698842048645, + "eval_runtime": 1.024, + "eval_samples_per_second": 851.56, + "eval_steps_per_second": 53.711, + "step": 3368 + } + ], + "logging_steps": 500, + "max_steps": 5052, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 43392374384520.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 1.8157089740819938e-05, + "num_train_epochs": 3, + "per_device_train_batch_size": 4, + "seed": 35 + } +} diff --git a/run-4/checkpoint-3368/training_args.bin b/run-4/checkpoint-3368/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67 --- /dev/null +++ b/run-4/checkpoint-3368/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160 +size 4984 diff --git a/run-4/checkpoint-5052/config.json b/run-4/checkpoint-5052/config.json new file mode 100644 index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7 --- /dev/null +++ b/run-4/checkpoint-5052/config.json @@ -0,0 +1,27 @@ +{ + "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384", + "architectures": [ + "BertForSequenceClassification" + ], + "attention_probs_dropout_prob": 0.1, + "classifier_dropout": null, + "hidden_act": "gelu", + "hidden_dropout_prob": 0.1, + "hidden_size": 384, + "initializer_range": 0.02, + "intermediate_size": 1536, + "layer_norm_eps": 1e-12, + "max_position_embeddings": 512, + "model_type": "bert", + "num_attention_heads": 12, + "num_hidden_layers": 12, + "pad_token_id": 0, + "position_embedding_type": "absolute", + "problem_type": "single_label_classification", + "tokenizer_class": "XLMRobertaTokenizer", + "torch_dtype": "float32", + "transformers_version": "4.38.2", + "type_vocab_size": 2, + "use_cache": true, + "vocab_size": 250037 +} diff --git a/run-4/checkpoint-5052/model.safetensors b/run-4/checkpoint-5052/model.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0213d4c88d411523a6dc3f79b97b864d86e09a77 --- /dev/null +++ b/run-4/checkpoint-5052/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bc896c70d6094782d316ed4ff72f355a62338eb7a17610edb8898dde5fe8c59 +size 470641664 diff --git a/run-4/checkpoint-5052/optimizer.pt b/run-4/checkpoint-5052/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..611cbb5bd8a6f3eadb7fa69156d740b98292946d --- /dev/null +++ b/run-4/checkpoint-5052/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6610967f4d9f60d2f8e29b3436f40329c89eb7448c48f2a4427bf1d5d853ca9e +size 941404410 diff --git a/run-4/checkpoint-5052/rng_state.pth b/run-4/checkpoint-5052/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..af482c0b5af19a576eca4d68bd5bedd69a2082ca --- /dev/null +++ b/run-4/checkpoint-5052/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45d43ebbd5c6e7468e158eea64220583e151933a83bc7c5839c0b2f74756eb6a +size 14244 diff --git a/run-4/checkpoint-5052/scheduler.pt b/run-4/checkpoint-5052/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..584ce4b843c683c2bdd7a2bea80dacc5895c6ef6 --- /dev/null +++ b/run-4/checkpoint-5052/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d15262bf90300804a3bb308d82d1264af762cf780cf2b47dbd486308f9d12b +size 1064 diff --git a/run-4/checkpoint-5052/sentencepiece.bpe.model b/run-4/checkpoint-5052/sentencepiece.bpe.model new file mode 100644 index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c --- /dev/null +++ b/run-4/checkpoint-5052/sentencepiece.bpe.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865 +size 5069051 diff --git a/run-4/checkpoint-5052/special_tokens_map.json b/run-4/checkpoint-5052/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e --- /dev/null +++ b/run-4/checkpoint-5052/special_tokens_map.json @@ -0,0 +1,15 @@ +{ + "bos_token": "", + "cls_token": "", + "eos_token": "", + "mask_token": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "", + "sep_token": "", + "unk_token": "" +} diff --git a/run-4/checkpoint-5052/tokenizer.json b/run-4/checkpoint-5052/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff --- /dev/null +++ b/run-4/checkpoint-5052/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15 +size 17098273 diff --git a/run-4/checkpoint-5052/tokenizer_config.json b/run-4/checkpoint-5052/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453 --- /dev/null +++ b/run-4/checkpoint-5052/tokenizer_config.json @@ -0,0 +1,55 @@ +{ + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "3": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "250001": { + "content": "", + "lstrip": true, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "bos_token": "", + "clean_up_tokenization_spaces": true, + "cls_token": "", + "eos_token": "", + "mask_token": "", + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "sep_token": "", + "sp_model_kwargs": {}, + "tokenizer_class": "XLMRobertaTokenizer", + "unk_token": "" +} diff --git a/run-4/checkpoint-5052/trainer_state.json b/run-4/checkpoint-5052/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..878c12ee77b0ff791d4f2355c0b206a0cbedccf4 --- /dev/null +++ b/run-4/checkpoint-5052/trainer_state.json @@ -0,0 +1,123 @@ +{ + "best_metric": 0.8371559633027523, + "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-5052", + "epoch": 3.0, + "eval_steps": 500, + "global_step": 5052, + "is_hyper_param_search": true, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.3, + "grad_norm": 207.84652709960938, + "learning_rate": 1.6360069774388828e-05, + "loss": 0.5581, + "step": 500 + }, + { + "epoch": 0.59, + "grad_norm": 1.0724189281463623, + "learning_rate": 1.4563049807957717e-05, + "loss": 0.5489, + "step": 1000 + }, + { + "epoch": 0.89, + "grad_norm": 2.0987462997436523, + "learning_rate": 1.2766029841526607e-05, + "loss": 0.5061, + "step": 1500 + }, + { + "epoch": 1.0, + "eval_accuracy": 0.823394495412844, + "eval_loss": 0.4638556241989136, + "eval_runtime": 1.0938, + "eval_samples_per_second": 797.217, + "eval_steps_per_second": 50.283, + "step": 1684 + }, + { + "epoch": 1.19, + "grad_norm": 2.370238780975342, + "learning_rate": 1.0969009875095499e-05, + "loss": 0.458, + "step": 2000 + }, + { + "epoch": 1.48, + "grad_norm": 25.474912643432617, + "learning_rate": 9.171989908664387e-06, + "loss": 0.4448, + "step": 2500 + }, + { + "epoch": 1.78, + "grad_norm": 2.603041887283325, + "learning_rate": 7.374969942233276e-06, + "loss": 0.4212, + "step": 3000 + }, + { + "epoch": 2.0, + "eval_accuracy": 0.8279816513761468, + "eval_loss": 0.5386698842048645, + "eval_runtime": 1.024, + "eval_samples_per_second": 851.56, + "eval_steps_per_second": 53.711, + "step": 3368 + }, + { + "epoch": 2.08, + "grad_norm": 3.378201484680176, + "learning_rate": 5.5779499758021666e-06, + "loss": 0.4011, + "step": 3500 + }, + { + "epoch": 2.38, + "grad_norm": 0.8383808135986328, + "learning_rate": 3.780930009371056e-06, + "loss": 0.3478, + "step": 4000 + }, + { + "epoch": 2.67, + "grad_norm": 23.079736709594727, + "learning_rate": 1.9839100429399457e-06, + "loss": 0.3534, + "step": 4500 + }, + { + "epoch": 2.97, + "grad_norm": 0.36013078689575195, + "learning_rate": 1.8689007650883547e-07, + "loss": 0.3323, + "step": 5000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.8371559633027523, + "eval_loss": 0.5858107805252075, + "eval_runtime": 0.9203, + "eval_samples_per_second": 947.555, + "eval_steps_per_second": 59.765, + "step": 5052 + } + ], + "logging_steps": 500, + "max_steps": 5052, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 72166406306568.0, + "train_batch_size": 4, + "trial_name": null, + "trial_params": { + "learning_rate": 1.8157089740819938e-05, + "num_train_epochs": 3, + "per_device_train_batch_size": 4, + "seed": 35 + } +} diff --git a/run-4/checkpoint-5052/training_args.bin b/run-4/checkpoint-5052/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67 --- /dev/null +++ b/run-4/checkpoint-5052/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160 +size 4984 diff --git a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6 b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6 new file mode 100644 index 0000000000000000000000000000000000000000..b82dd04d770c5c032c3af4f677bb797b7b016c9a --- /dev/null +++ b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdca43a7dc403770bff90d5950037a32cf18bf1763fcce826b9ae8fd38b7b16c +size 8103 diff --git a/training_args.bin b/training_args.bin index 47bcbba551c4a22f3fcb86da051ef0c061c72bee..a8e48306f0db4daa511e23a8d280034879985e67 100644 --- a/training_args.bin +++ b/training_args.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e +oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160 size 4984