diff --git a/model.safetensors b/model.safetensors
index ccb8c972557f85879fd6a14d6c3e55ea15530b7f..0213d4c88d411523a6dc3f79b97b864d86e09a77 100644
--- a/model.safetensors
+++ b/model.safetensors
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:4c0e48f30763fd0ec52154bdda6e91ed85ffc6fd3ed79f829fb02df5c50aa9fa
+oid sha256:6bc896c70d6094782d316ed4ff72f355a62338eb7a17610edb8898dde5fe8c59
size 470641664
diff --git a/run-3/checkpoint-106/config.json b/run-3/checkpoint-106/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-3/checkpoint-106/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-3/checkpoint-106/model.safetensors b/run-3/checkpoint-106/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ccb8c972557f85879fd6a14d6c3e55ea15530b7f
--- /dev/null
+++ b/run-3/checkpoint-106/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4c0e48f30763fd0ec52154bdda6e91ed85ffc6fd3ed79f829fb02df5c50aa9fa
+size 470641664
diff --git a/run-3/checkpoint-106/optimizer.pt b/run-3/checkpoint-106/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..800539d2b9ca7097429919e651efad907ba9d9e1
--- /dev/null
+++ b/run-3/checkpoint-106/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e1066f156de9416ec408f6a47e725f9a0e00846fb791e971dd5474ef15ba7ab
+size 941404410
diff --git a/run-3/checkpoint-106/rng_state.pth b/run-3/checkpoint-106/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..7f49578e92281a64ed42722f453f635f775c26fd
--- /dev/null
+++ b/run-3/checkpoint-106/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9461ce57ca642305c3edafd8c0e0cfa746276df6556c74321702a8635b1ffc1
+size 14180
diff --git a/run-3/checkpoint-106/scheduler.pt b/run-3/checkpoint-106/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..f6292db65d584db96b9f01a6c24b756ad38e2d36
--- /dev/null
+++ b/run-3/checkpoint-106/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e08949a8c9619e1178b5b8b3d8ff36b49af0be839f5c43e356640db1ae24bf2
+size 1064
diff --git a/run-3/checkpoint-106/sentencepiece.bpe.model b/run-3/checkpoint-106/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-3/checkpoint-106/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-3/checkpoint-106/special_tokens_map.json b/run-3/checkpoint-106/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-3/checkpoint-106/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-106/tokenizer.json b/run-3/checkpoint-106/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-3/checkpoint-106/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-3/checkpoint-106/tokenizer_config.json b/run-3/checkpoint-106/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-3/checkpoint-106/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-106/trainer_state.json b/run-3/checkpoint-106/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..3273f10b679e53863e1606bcf8f9369bb1be9168
--- /dev/null
+++ b/run-3/checkpoint-106/trainer_state.json
@@ -0,0 +1,35 @@
+{
+ "best_metric": 0.7993119266055045,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-106",
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 106,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.7993119266055045,
+ "eval_loss": 0.4503490626811981,
+ "eval_runtime": 0.9477,
+ "eval_samples_per_second": 920.118,
+ "eval_steps_per_second": 58.035,
+ "step": 106
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 424,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 4,
+ "save_steps": 500,
+ "total_flos": 0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 2.3419555649158772e-05,
+ "num_train_epochs": 4,
+ "per_device_train_batch_size": 64,
+ "seed": 10
+ }
+}
diff --git a/run-3/checkpoint-106/training_args.bin b/run-3/checkpoint-106/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee
--- /dev/null
+++ b/run-3/checkpoint-106/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e
+size 4984
diff --git a/run-3/checkpoint-212/config.json b/run-3/checkpoint-212/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-3/checkpoint-212/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-3/checkpoint-212/model.safetensors b/run-3/checkpoint-212/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5c0a069bff8767d0a4b2392e6e75ab1ea9c28e87
--- /dev/null
+++ b/run-3/checkpoint-212/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c18dc7de4e55089ad012e0cbd4d5998b2e9d5ad8b1ebbacf65813fb3e3ba447e
+size 470641664
diff --git a/run-3/checkpoint-212/optimizer.pt b/run-3/checkpoint-212/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..ecf1582d96981816e072fda3445380abaed53912
--- /dev/null
+++ b/run-3/checkpoint-212/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c0c1491054c487e3608157f3ae9903823d0a16bc915479d65af9231c53a726fa
+size 941404410
diff --git a/run-3/checkpoint-212/rng_state.pth b/run-3/checkpoint-212/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..dd729eb1ac66d9869c99dabf8e06f9f8fe108721
--- /dev/null
+++ b/run-3/checkpoint-212/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4aad64024c239e0abbb01ad3997703e20fef94d4867ddbd3a89f17b07d74ce20
+size 14180
diff --git a/run-3/checkpoint-212/scheduler.pt b/run-3/checkpoint-212/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..1ec69e2cf7a8a2afa8a1c7c578fbbc38a9d6ff14
--- /dev/null
+++ b/run-3/checkpoint-212/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:00fa9134e8899ea0c6296021cb99f5036db8a95308bc03e8ab78743a7b8e3931
+size 1064
diff --git a/run-3/checkpoint-212/sentencepiece.bpe.model b/run-3/checkpoint-212/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-3/checkpoint-212/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-3/checkpoint-212/special_tokens_map.json b/run-3/checkpoint-212/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-3/checkpoint-212/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-212/tokenizer.json b/run-3/checkpoint-212/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-3/checkpoint-212/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-3/checkpoint-212/tokenizer_config.json b/run-3/checkpoint-212/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-3/checkpoint-212/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-212/trainer_state.json b/run-3/checkpoint-212/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..f580cfd2e15952c76e1b390a060249ca61d3464a
--- /dev/null
+++ b/run-3/checkpoint-212/trainer_state.json
@@ -0,0 +1,44 @@
+{
+ "best_metric": 0.8073394495412844,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-212",
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 212,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.7993119266055045,
+ "eval_loss": 0.4503490626811981,
+ "eval_runtime": 0.9477,
+ "eval_samples_per_second": 920.118,
+ "eval_steps_per_second": 58.035,
+ "step": 106
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8073394495412844,
+ "eval_loss": 0.43257269263267517,
+ "eval_runtime": 1.0392,
+ "eval_samples_per_second": 839.146,
+ "eval_steps_per_second": 52.928,
+ "step": 212
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 424,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 4,
+ "save_steps": 500,
+ "total_flos": 0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 2.3419555649158772e-05,
+ "num_train_epochs": 4,
+ "per_device_train_batch_size": 64,
+ "seed": 10
+ }
+}
diff --git a/run-3/checkpoint-212/training_args.bin b/run-3/checkpoint-212/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee
--- /dev/null
+++ b/run-3/checkpoint-212/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e
+size 4984
diff --git a/run-3/checkpoint-318/config.json b/run-3/checkpoint-318/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-3/checkpoint-318/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-3/checkpoint-318/model.safetensors b/run-3/checkpoint-318/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ea83dca43671ca52704a47e8be4115a1b469dfa5
--- /dev/null
+++ b/run-3/checkpoint-318/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0b00986ec8e99ec9bfcd4ccdf742bdb4e2a30555988d621183287a83b45d2c73
+size 470641664
diff --git a/run-3/checkpoint-318/optimizer.pt b/run-3/checkpoint-318/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..c661fef9b06835af59a40a3fcf6ff4e8d3c117b2
--- /dev/null
+++ b/run-3/checkpoint-318/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63ab24a7117e8e6a90bcff53709902d9e3ed43c0d72105ff7537f850458bbef8
+size 941404410
diff --git a/run-3/checkpoint-318/rng_state.pth b/run-3/checkpoint-318/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..bd6dc93fce1ba00813e859cf55af276bc62ed80f
--- /dev/null
+++ b/run-3/checkpoint-318/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e16d62d9a350bce2c01b07772d36050731efc5c4f5f31285fef551875e7831a0
+size 14180
diff --git a/run-3/checkpoint-318/scheduler.pt b/run-3/checkpoint-318/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..933b3793475cc0a298987763475699cc04286515
--- /dev/null
+++ b/run-3/checkpoint-318/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8898f534b95e18f98550bf2084744a1bc461a6981327828253dbbb1ebe6ee9fa
+size 1064
diff --git a/run-3/checkpoint-318/sentencepiece.bpe.model b/run-3/checkpoint-318/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-3/checkpoint-318/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-3/checkpoint-318/special_tokens_map.json b/run-3/checkpoint-318/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-3/checkpoint-318/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-318/tokenizer.json b/run-3/checkpoint-318/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-3/checkpoint-318/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-3/checkpoint-318/tokenizer_config.json b/run-3/checkpoint-318/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-3/checkpoint-318/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-318/trainer_state.json b/run-3/checkpoint-318/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..6a07ac0a422a5f49999cbaf008e7eac6fddb2868
--- /dev/null
+++ b/run-3/checkpoint-318/trainer_state.json
@@ -0,0 +1,53 @@
+{
+ "best_metric": 0.8337155963302753,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-318",
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 318,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.7993119266055045,
+ "eval_loss": 0.4503490626811981,
+ "eval_runtime": 0.9477,
+ "eval_samples_per_second": 920.118,
+ "eval_steps_per_second": 58.035,
+ "step": 106
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8073394495412844,
+ "eval_loss": 0.43257269263267517,
+ "eval_runtime": 1.0392,
+ "eval_samples_per_second": 839.146,
+ "eval_steps_per_second": 52.928,
+ "step": 212
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8337155963302753,
+ "eval_loss": 0.40033113956451416,
+ "eval_runtime": 1.028,
+ "eval_samples_per_second": 848.263,
+ "eval_steps_per_second": 53.503,
+ "step": 318
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 424,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 4,
+ "save_steps": 500,
+ "total_flos": 0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 2.3419555649158772e-05,
+ "num_train_epochs": 4,
+ "per_device_train_batch_size": 64,
+ "seed": 10
+ }
+}
diff --git a/run-3/checkpoint-318/training_args.bin b/run-3/checkpoint-318/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee
--- /dev/null
+++ b/run-3/checkpoint-318/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e
+size 4984
diff --git a/run-3/checkpoint-424/config.json b/run-3/checkpoint-424/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-3/checkpoint-424/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-3/checkpoint-424/model.safetensors b/run-3/checkpoint-424/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c8674bdf6a66e7229d783fb0bd06c379aeeb86a7
--- /dev/null
+++ b/run-3/checkpoint-424/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f94b9f076b895c457be18902125e8ff79d84c0baf6e46a1668e5ca3d2a31c1c
+size 470641664
diff --git a/run-3/checkpoint-424/optimizer.pt b/run-3/checkpoint-424/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..94b963d9497b4e7ff99f38b5342781ea802ab3f1
--- /dev/null
+++ b/run-3/checkpoint-424/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6b4711cddda48f5d9ebde562a702e2b08e5941687c545a60992c5f5dcc4e6f7a
+size 941404410
diff --git a/run-3/checkpoint-424/rng_state.pth b/run-3/checkpoint-424/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..03c9c5dc09cf6a88abd646a4b00ddafb3ad42abf
--- /dev/null
+++ b/run-3/checkpoint-424/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:71a57c760827965bd0c415b117690931ee01b7b4021094d1f430b5c8361d6467
+size 14180
diff --git a/run-3/checkpoint-424/scheduler.pt b/run-3/checkpoint-424/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..990f7f36ec40a249bc732b5c3218d09f15d5d285
--- /dev/null
+++ b/run-3/checkpoint-424/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:964e8e0e3bf7dd21a33ba2eb4569d1ffee98f70360217372207afc2c1d1d5018
+size 1064
diff --git a/run-3/checkpoint-424/sentencepiece.bpe.model b/run-3/checkpoint-424/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-3/checkpoint-424/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-3/checkpoint-424/special_tokens_map.json b/run-3/checkpoint-424/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-3/checkpoint-424/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-424/tokenizer.json b/run-3/checkpoint-424/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-3/checkpoint-424/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-3/checkpoint-424/tokenizer_config.json b/run-3/checkpoint-424/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-3/checkpoint-424/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-3/checkpoint-424/trainer_state.json b/run-3/checkpoint-424/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..405d2ab2f0b1f7185497c4bfe70de97e91250b9c
--- /dev/null
+++ b/run-3/checkpoint-424/trainer_state.json
@@ -0,0 +1,62 @@
+{
+ "best_metric": 0.8348623853211009,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-3/checkpoint-424",
+ "epoch": 4.0,
+ "eval_steps": 500,
+ "global_step": 424,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.7993119266055045,
+ "eval_loss": 0.4503490626811981,
+ "eval_runtime": 0.9477,
+ "eval_samples_per_second": 920.118,
+ "eval_steps_per_second": 58.035,
+ "step": 106
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8073394495412844,
+ "eval_loss": 0.43257269263267517,
+ "eval_runtime": 1.0392,
+ "eval_samples_per_second": 839.146,
+ "eval_steps_per_second": 52.928,
+ "step": 212
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8337155963302753,
+ "eval_loss": 0.40033113956451416,
+ "eval_runtime": 1.028,
+ "eval_samples_per_second": 848.263,
+ "eval_steps_per_second": 53.503,
+ "step": 318
+ },
+ {
+ "epoch": 4.0,
+ "eval_accuracy": 0.8348623853211009,
+ "eval_loss": 0.4028107821941376,
+ "eval_runtime": 1.2011,
+ "eval_samples_per_second": 726.009,
+ "eval_steps_per_second": 45.792,
+ "step": 424
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 424,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 4,
+ "save_steps": 500,
+ "total_flos": 0,
+ "train_batch_size": 64,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 2.3419555649158772e-05,
+ "num_train_epochs": 4,
+ "per_device_train_batch_size": 64,
+ "seed": 10
+ }
+}
diff --git a/run-3/checkpoint-424/training_args.bin b/run-3/checkpoint-424/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..47bcbba551c4a22f3fcb86da051ef0c061c72bee
--- /dev/null
+++ b/run-3/checkpoint-424/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e
+size 4984
diff --git a/run-4/checkpoint-1684/config.json b/run-4/checkpoint-1684/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-4/checkpoint-1684/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-4/checkpoint-1684/model.safetensors b/run-4/checkpoint-1684/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8b421a1c2c9be40a4f23cc5c3b0d7fff487da185
--- /dev/null
+++ b/run-4/checkpoint-1684/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4dc39d2132b33b4c6c39547e89bcf3368c78de7a7ace0433c0327aebd9b89887
+size 470641664
diff --git a/run-4/checkpoint-1684/optimizer.pt b/run-4/checkpoint-1684/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..d539bd5cc1dba8323d2818da49f31a8f0a8bfcef
--- /dev/null
+++ b/run-4/checkpoint-1684/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93f9e06421817aecd4158d12cdc51bfd3528093b145fd8ae22a448057c2201e4
+size 941404410
diff --git a/run-4/checkpoint-1684/rng_state.pth b/run-4/checkpoint-1684/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..9439ac8032790c4c44b1275476a07add00c1f6e7
--- /dev/null
+++ b/run-4/checkpoint-1684/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c1ce78359f243cfa7302f5705f7e3ff4001b624a0e229968654129955cf4636
+size 14244
diff --git a/run-4/checkpoint-1684/scheduler.pt b/run-4/checkpoint-1684/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..930c464a07f04a988bb0dd1c5b5098b0add34371
--- /dev/null
+++ b/run-4/checkpoint-1684/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36aa2b6b43cb7c4d638205aa6941a4bfdc273c2bfa008f9ac5ff98124d0e026a
+size 1064
diff --git a/run-4/checkpoint-1684/sentencepiece.bpe.model b/run-4/checkpoint-1684/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-4/checkpoint-1684/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-4/checkpoint-1684/special_tokens_map.json b/run-4/checkpoint-1684/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-4/checkpoint-1684/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-1684/tokenizer.json b/run-4/checkpoint-1684/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-4/checkpoint-1684/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-4/checkpoint-1684/tokenizer_config.json b/run-4/checkpoint-1684/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-4/checkpoint-1684/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-1684/trainer_state.json b/run-4/checkpoint-1684/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..679a4bcffa4166394bbca4f248827b28c8d05ed8
--- /dev/null
+++ b/run-4/checkpoint-1684/trainer_state.json
@@ -0,0 +1,56 @@
+{
+ "best_metric": 0.823394495412844,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-1684",
+ "epoch": 1.0,
+ "eval_steps": 500,
+ "global_step": 1684,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.3,
+ "grad_norm": 207.84652709960938,
+ "learning_rate": 1.6360069774388828e-05,
+ "loss": 0.5581,
+ "step": 500
+ },
+ {
+ "epoch": 0.59,
+ "grad_norm": 1.0724189281463623,
+ "learning_rate": 1.4563049807957717e-05,
+ "loss": 0.5489,
+ "step": 1000
+ },
+ {
+ "epoch": 0.89,
+ "grad_norm": 2.0987462997436523,
+ "learning_rate": 1.2766029841526607e-05,
+ "loss": 0.5061,
+ "step": 1500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.4638556241989136,
+ "eval_runtime": 1.0938,
+ "eval_samples_per_second": 797.217,
+ "eval_steps_per_second": 50.283,
+ "step": 1684
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 5052,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 500,
+ "total_flos": 21648969825168.0,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.8157089740819938e-05,
+ "num_train_epochs": 3,
+ "per_device_train_batch_size": 4,
+ "seed": 35
+ }
+}
diff --git a/run-4/checkpoint-1684/training_args.bin b/run-4/checkpoint-1684/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67
--- /dev/null
+++ b/run-4/checkpoint-1684/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160
+size 4984
diff --git a/run-4/checkpoint-3368/config.json b/run-4/checkpoint-3368/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-4/checkpoint-3368/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-4/checkpoint-3368/model.safetensors b/run-4/checkpoint-3368/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..bdbb41d949f7ef0b7d60eba5462cd743e7c4f68a
--- /dev/null
+++ b/run-4/checkpoint-3368/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a920204ecd53bd93d10e651560e02497ae635eb11e7469914e945b4a87ad141
+size 470641664
diff --git a/run-4/checkpoint-3368/optimizer.pt b/run-4/checkpoint-3368/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..92d25e47b43dd5282c025c0c129f6562c9ab923c
--- /dev/null
+++ b/run-4/checkpoint-3368/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a32629ebb40042e7312c84daa384b68ba7df7ec1d373a1dc74f36f1b93cd9208
+size 941404410
diff --git a/run-4/checkpoint-3368/rng_state.pth b/run-4/checkpoint-3368/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..1563db9409da7c33a95591816700da4bac5a97c8
--- /dev/null
+++ b/run-4/checkpoint-3368/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dad14821ff48c37c8a36dfbc79c323a1ef6bcdb2b8e884d3f5044ddba5c12f78
+size 14244
diff --git a/run-4/checkpoint-3368/scheduler.pt b/run-4/checkpoint-3368/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..03a8b097a2cd7911982a3a480251af0026e7926c
--- /dev/null
+++ b/run-4/checkpoint-3368/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e5dd2d781a47db2d178ebea2c95d9162c1048f8a380d4575bda749bd805b29b2
+size 1064
diff --git a/run-4/checkpoint-3368/sentencepiece.bpe.model b/run-4/checkpoint-3368/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-4/checkpoint-3368/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-4/checkpoint-3368/special_tokens_map.json b/run-4/checkpoint-3368/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-4/checkpoint-3368/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-3368/tokenizer.json b/run-4/checkpoint-3368/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-4/checkpoint-3368/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-4/checkpoint-3368/tokenizer_config.json b/run-4/checkpoint-3368/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-4/checkpoint-3368/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-3368/trainer_state.json b/run-4/checkpoint-3368/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..b13f435db7876065628d9530c155e55c67fcd8ac
--- /dev/null
+++ b/run-4/checkpoint-3368/trainer_state.json
@@ -0,0 +1,86 @@
+{
+ "best_metric": 0.8279816513761468,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-3368",
+ "epoch": 2.0,
+ "eval_steps": 500,
+ "global_step": 3368,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.3,
+ "grad_norm": 207.84652709960938,
+ "learning_rate": 1.6360069774388828e-05,
+ "loss": 0.5581,
+ "step": 500
+ },
+ {
+ "epoch": 0.59,
+ "grad_norm": 1.0724189281463623,
+ "learning_rate": 1.4563049807957717e-05,
+ "loss": 0.5489,
+ "step": 1000
+ },
+ {
+ "epoch": 0.89,
+ "grad_norm": 2.0987462997436523,
+ "learning_rate": 1.2766029841526607e-05,
+ "loss": 0.5061,
+ "step": 1500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.4638556241989136,
+ "eval_runtime": 1.0938,
+ "eval_samples_per_second": 797.217,
+ "eval_steps_per_second": 50.283,
+ "step": 1684
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 2.370238780975342,
+ "learning_rate": 1.0969009875095499e-05,
+ "loss": 0.458,
+ "step": 2000
+ },
+ {
+ "epoch": 1.48,
+ "grad_norm": 25.474912643432617,
+ "learning_rate": 9.171989908664387e-06,
+ "loss": 0.4448,
+ "step": 2500
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.603041887283325,
+ "learning_rate": 7.374969942233276e-06,
+ "loss": 0.4212,
+ "step": 3000
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8279816513761468,
+ "eval_loss": 0.5386698842048645,
+ "eval_runtime": 1.024,
+ "eval_samples_per_second": 851.56,
+ "eval_steps_per_second": 53.711,
+ "step": 3368
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 5052,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 500,
+ "total_flos": 43392374384520.0,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.8157089740819938e-05,
+ "num_train_epochs": 3,
+ "per_device_train_batch_size": 4,
+ "seed": 35
+ }
+}
diff --git a/run-4/checkpoint-3368/training_args.bin b/run-4/checkpoint-3368/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67
--- /dev/null
+++ b/run-4/checkpoint-3368/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160
+size 4984
diff --git a/run-4/checkpoint-5052/config.json b/run-4/checkpoint-5052/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..9d567d45f62ff0518929cc5635887991007fcfb7
--- /dev/null
+++ b/run-4/checkpoint-5052/config.json
@@ -0,0 +1,27 @@
+{
+ "_name_or_path": "microsoft/Multilingual-MiniLM-L12-H384",
+ "architectures": [
+ "BertForSequenceClassification"
+ ],
+ "attention_probs_dropout_prob": 0.1,
+ "classifier_dropout": null,
+ "hidden_act": "gelu",
+ "hidden_dropout_prob": 0.1,
+ "hidden_size": 384,
+ "initializer_range": 0.02,
+ "intermediate_size": 1536,
+ "layer_norm_eps": 1e-12,
+ "max_position_embeddings": 512,
+ "model_type": "bert",
+ "num_attention_heads": 12,
+ "num_hidden_layers": 12,
+ "pad_token_id": 0,
+ "position_embedding_type": "absolute",
+ "problem_type": "single_label_classification",
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "torch_dtype": "float32",
+ "transformers_version": "4.38.2",
+ "type_vocab_size": 2,
+ "use_cache": true,
+ "vocab_size": 250037
+}
diff --git a/run-4/checkpoint-5052/model.safetensors b/run-4/checkpoint-5052/model.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0213d4c88d411523a6dc3f79b97b864d86e09a77
--- /dev/null
+++ b/run-4/checkpoint-5052/model.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bc896c70d6094782d316ed4ff72f355a62338eb7a17610edb8898dde5fe8c59
+size 470641664
diff --git a/run-4/checkpoint-5052/optimizer.pt b/run-4/checkpoint-5052/optimizer.pt
new file mode 100644
index 0000000000000000000000000000000000000000..611cbb5bd8a6f3eadb7fa69156d740b98292946d
--- /dev/null
+++ b/run-4/checkpoint-5052/optimizer.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6610967f4d9f60d2f8e29b3436f40329c89eb7448c48f2a4427bf1d5d853ca9e
+size 941404410
diff --git a/run-4/checkpoint-5052/rng_state.pth b/run-4/checkpoint-5052/rng_state.pth
new file mode 100644
index 0000000000000000000000000000000000000000..af482c0b5af19a576eca4d68bd5bedd69a2082ca
--- /dev/null
+++ b/run-4/checkpoint-5052/rng_state.pth
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45d43ebbd5c6e7468e158eea64220583e151933a83bc7c5839c0b2f74756eb6a
+size 14244
diff --git a/run-4/checkpoint-5052/scheduler.pt b/run-4/checkpoint-5052/scheduler.pt
new file mode 100644
index 0000000000000000000000000000000000000000..584ce4b843c683c2bdd7a2bea80dacc5895c6ef6
--- /dev/null
+++ b/run-4/checkpoint-5052/scheduler.pt
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e7d15262bf90300804a3bb308d82d1264af762cf780cf2b47dbd486308f9d12b
+size 1064
diff --git a/run-4/checkpoint-5052/sentencepiece.bpe.model b/run-4/checkpoint-5052/sentencepiece.bpe.model
new file mode 100644
index 0000000000000000000000000000000000000000..7a3f40a75f870bc1f21700cd414dc2acc431583c
--- /dev/null
+++ b/run-4/checkpoint-5052/sentencepiece.bpe.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfc8146abe2a0488e9e2a0c56de7952f7c11ab059eca145a0a727afce0db2865
+size 5069051
diff --git a/run-4/checkpoint-5052/special_tokens_map.json b/run-4/checkpoint-5052/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d5698132694f4f1bcff08fa7d937b1701812598e
--- /dev/null
+++ b/run-4/checkpoint-5052/special_tokens_map.json
@@ -0,0 +1,15 @@
+{
+ "bos_token": "",
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": "",
+ "sep_token": "",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-5052/tokenizer.json b/run-4/checkpoint-5052/tokenizer.json
new file mode 100644
index 0000000000000000000000000000000000000000..1c98a28b19c084901b2622d8e8b15e85eed13bff
--- /dev/null
+++ b/run-4/checkpoint-5052/tokenizer.json
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:69564b696052886ed0ac63fa393e928384e0f8caada38c1f4864a9bfbf379c15
+size 17098273
diff --git a/run-4/checkpoint-5052/tokenizer_config.json b/run-4/checkpoint-5052/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..30eb6a901d427cdc9c43054f7c72a246771a8453
--- /dev/null
+++ b/run-4/checkpoint-5052/tokenizer_config.json
@@ -0,0 +1,55 @@
+{
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "3": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "250001": {
+ "content": "",
+ "lstrip": true,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "bos_token": "",
+ "clean_up_tokenization_spaces": true,
+ "cls_token": "",
+ "eos_token": "",
+ "mask_token": "",
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "sep_token": "",
+ "sp_model_kwargs": {},
+ "tokenizer_class": "XLMRobertaTokenizer",
+ "unk_token": ""
+}
diff --git a/run-4/checkpoint-5052/trainer_state.json b/run-4/checkpoint-5052/trainer_state.json
new file mode 100644
index 0000000000000000000000000000000000000000..878c12ee77b0ff791d4f2355c0b206a0cbedccf4
--- /dev/null
+++ b/run-4/checkpoint-5052/trainer_state.json
@@ -0,0 +1,123 @@
+{
+ "best_metric": 0.8371559633027523,
+ "best_model_checkpoint": "microsoft/Multilingual-MiniLM-L12-H384-finetuned-sst2/run-4/checkpoint-5052",
+ "epoch": 3.0,
+ "eval_steps": 500,
+ "global_step": 5052,
+ "is_hyper_param_search": true,
+ "is_local_process_zero": true,
+ "is_world_process_zero": true,
+ "log_history": [
+ {
+ "epoch": 0.3,
+ "grad_norm": 207.84652709960938,
+ "learning_rate": 1.6360069774388828e-05,
+ "loss": 0.5581,
+ "step": 500
+ },
+ {
+ "epoch": 0.59,
+ "grad_norm": 1.0724189281463623,
+ "learning_rate": 1.4563049807957717e-05,
+ "loss": 0.5489,
+ "step": 1000
+ },
+ {
+ "epoch": 0.89,
+ "grad_norm": 2.0987462997436523,
+ "learning_rate": 1.2766029841526607e-05,
+ "loss": 0.5061,
+ "step": 1500
+ },
+ {
+ "epoch": 1.0,
+ "eval_accuracy": 0.823394495412844,
+ "eval_loss": 0.4638556241989136,
+ "eval_runtime": 1.0938,
+ "eval_samples_per_second": 797.217,
+ "eval_steps_per_second": 50.283,
+ "step": 1684
+ },
+ {
+ "epoch": 1.19,
+ "grad_norm": 2.370238780975342,
+ "learning_rate": 1.0969009875095499e-05,
+ "loss": 0.458,
+ "step": 2000
+ },
+ {
+ "epoch": 1.48,
+ "grad_norm": 25.474912643432617,
+ "learning_rate": 9.171989908664387e-06,
+ "loss": 0.4448,
+ "step": 2500
+ },
+ {
+ "epoch": 1.78,
+ "grad_norm": 2.603041887283325,
+ "learning_rate": 7.374969942233276e-06,
+ "loss": 0.4212,
+ "step": 3000
+ },
+ {
+ "epoch": 2.0,
+ "eval_accuracy": 0.8279816513761468,
+ "eval_loss": 0.5386698842048645,
+ "eval_runtime": 1.024,
+ "eval_samples_per_second": 851.56,
+ "eval_steps_per_second": 53.711,
+ "step": 3368
+ },
+ {
+ "epoch": 2.08,
+ "grad_norm": 3.378201484680176,
+ "learning_rate": 5.5779499758021666e-06,
+ "loss": 0.4011,
+ "step": 3500
+ },
+ {
+ "epoch": 2.38,
+ "grad_norm": 0.8383808135986328,
+ "learning_rate": 3.780930009371056e-06,
+ "loss": 0.3478,
+ "step": 4000
+ },
+ {
+ "epoch": 2.67,
+ "grad_norm": 23.079736709594727,
+ "learning_rate": 1.9839100429399457e-06,
+ "loss": 0.3534,
+ "step": 4500
+ },
+ {
+ "epoch": 2.97,
+ "grad_norm": 0.36013078689575195,
+ "learning_rate": 1.8689007650883547e-07,
+ "loss": 0.3323,
+ "step": 5000
+ },
+ {
+ "epoch": 3.0,
+ "eval_accuracy": 0.8371559633027523,
+ "eval_loss": 0.5858107805252075,
+ "eval_runtime": 0.9203,
+ "eval_samples_per_second": 947.555,
+ "eval_steps_per_second": 59.765,
+ "step": 5052
+ }
+ ],
+ "logging_steps": 500,
+ "max_steps": 5052,
+ "num_input_tokens_seen": 0,
+ "num_train_epochs": 3,
+ "save_steps": 500,
+ "total_flos": 72166406306568.0,
+ "train_batch_size": 4,
+ "trial_name": null,
+ "trial_params": {
+ "learning_rate": 1.8157089740819938e-05,
+ "num_train_epochs": 3,
+ "per_device_train_batch_size": 4,
+ "seed": 35
+ }
+}
diff --git a/run-4/checkpoint-5052/training_args.bin b/run-4/checkpoint-5052/training_args.bin
new file mode 100644
index 0000000000000000000000000000000000000000..a8e48306f0db4daa511e23a8d280034879985e67
--- /dev/null
+++ b/run-4/checkpoint-5052/training_args.bin
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160
+size 4984
diff --git a/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6 b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6
new file mode 100644
index 0000000000000000000000000000000000000000..b82dd04d770c5c032c3af4f677bb797b7b016c9a
--- /dev/null
+++ b/runs/Mar05_10-49-50_0db4763e2117/events.out.tfevents.1709639319.0db4763e2117.1712.6
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdca43a7dc403770bff90d5950037a32cf18bf1763fcce826b9ae8fd38b7b16c
+size 8103
diff --git a/training_args.bin b/training_args.bin
index 47bcbba551c4a22f3fcb86da051ef0c061c72bee..a8e48306f0db4daa511e23a8d280034879985e67 100644
--- a/training_args.bin
+++ b/training_args.bin
@@ -1,3 +1,3 @@
version https://git-lfs.github.com/spec/v1
-oid sha256:ae913b9d9917311801715473b973aed6b7064f0996bb6f6f0e3ab9ab897efb3e
+oid sha256:1f6a6f9969049103e5698f1db819dd1df7a7e024a2e35a83b61a6fb789d6e160
size 4984