Slava commited on Jan 25, 2024

Commit

42b93ed

verified ·

1 Parent(s): f1bd202

Training in progress, epoch 1

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

logs/events.out.tfevents.1706181411.A-FVFFG2C4Q05P.18450.3 +2 -2
logs/events.out.tfevents.1706189038.A-FVFFG2C4Q05P.18450.4 +3 -0
logs/events.out.tfevents.1706196200.A-FVFFG2C4Q05P.18450.5 +3 -0
model.safetensors +1 -1
run-2/checkpoint-2108/config.json +34 -0
run-2/checkpoint-2108/model.safetensors +3 -0
run-2/checkpoint-2108/optimizer.pt +3 -0
run-2/checkpoint-2108/rng_state.pth +3 -0
run-2/checkpoint-2108/scheduler.pt +3 -0
run-2/checkpoint-2108/special_tokens_map.json +7 -0
run-2/checkpoint-2108/tokenizer.json +0 -0
run-2/checkpoint-2108/tokenizer_config.json +57 -0
run-2/checkpoint-2108/trainer_state.json +86 -0
run-2/checkpoint-2108/training_args.bin +3 -0
run-2/checkpoint-2108/vocab.txt +0 -0
run-2/checkpoint-3689/config.json +34 -0
run-2/checkpoint-3689/model.safetensors +3 -0
run-2/checkpoint-3689/optimizer.pt +3 -0
run-2/checkpoint-3689/rng_state.pth +3 -0
run-2/checkpoint-3689/scheduler.pt +3 -0
run-2/checkpoint-3689/special_tokens_map.json +7 -0
run-2/checkpoint-3689/tokenizer.json +0 -0
run-2/checkpoint-3689/tokenizer_config.json +57 -0
run-2/checkpoint-3689/trainer_state.json +131 -0
run-2/checkpoint-3689/training_args.bin +3 -0
run-2/checkpoint-3689/vocab.txt +0 -0
run-3/checkpoint-2635/config.json +34 -0
run-3/checkpoint-2635/model.safetensors +3 -0
run-3/checkpoint-2635/optimizer.pt +3 -0
run-3/checkpoint-2635/rng_state.pth +3 -0
run-3/checkpoint-2635/scheduler.pt +3 -0
run-3/checkpoint-2635/special_tokens_map.json +7 -0
run-3/checkpoint-2635/tokenizer.json +0 -0
run-3/checkpoint-2635/tokenizer_config.json +57 -0
run-3/checkpoint-2635/trainer_state.json +101 -0
run-3/checkpoint-2635/training_args.bin +3 -0
run-3/checkpoint-2635/vocab.txt +0 -0
run-3/checkpoint-527/config.json +34 -0
run-3/checkpoint-527/model.safetensors +3 -0
run-3/checkpoint-527/optimizer.pt +3 -0
run-3/checkpoint-527/rng_state.pth +3 -0
run-3/checkpoint-527/scheduler.pt +3 -0
run-3/checkpoint-527/special_tokens_map.json +7 -0
run-3/checkpoint-527/tokenizer.json +0 -0
run-3/checkpoint-527/tokenizer_config.json +57 -0
run-3/checkpoint-527/trainer_state.json +41 -0
run-3/checkpoint-527/training_args.bin +3 -0
run-3/checkpoint-527/vocab.txt +0 -0
run-4/checkpoint-527/config.json +34 -0
run-4/checkpoint-527/model.safetensors +3 -0

logs/events.out.tfevents.1706181411.A-FVFFG2C4Q05P.18450.3 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a846a378b4ba152a38f2ee7d514086d755d7154e548a2ace0cac942fa771be9
-size 5396

 version https://git-lfs.github.com/spec/v1
+oid sha256:ba53916c1de06b8b9e3bf5372f7df77dce88d1e1ca976d5a723e5138bb08ace8
+size 8150

logs/events.out.tfevents.1706189038.A-FVFFG2C4Q05P.18450.4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed792ecc848af69afb8ff6164744104da56d6f72fb5446eefe50e390800ece89
+size 7190

logs/events.out.tfevents.1706196200.A-FVFFG2C4Q05P.18450.5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:53b33c54d05ee0e72b3ad04a32dc40448bf1c1c65597a97ff3edeb33a16ee5c9
+size 4915

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:395d28d0adf7ece98dfdd310bc63f02d8bcd77ca2a202581e35924db72b1b4d3
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:deb8aa05f92c3e3e3100697f91476432307ef227d91e417e359f633516f53ade
 size 17549312

run-2/checkpoint-2108/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-2/checkpoint-2108/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:059957aafba48a0874192cdd9adfa8134d4b6e1d4b526ccdc3daf3e95d060417
+size 17549312

run-2/checkpoint-2108/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:800621d9b455b609e2a1673e9ef3bda30026ba964354572696b863407bed623f
+size 35122373

run-2/checkpoint-2108/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2aae3bffb74208d13dfae4d8b66eb00427742bd84752ee081ae0f135d90f179b
+size 13617

run-2/checkpoint-2108/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:46f542e15b9cbe13d4728c333b67a5c6dac10a3d94a471014056774cbe2dee00
+size 627

run-2/checkpoint-2108/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-2108/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-2108/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-2108/trainer_state.json ADDED Viewed

	@@ -0,0 +1,86 @@

+{
+  "best_metric": 0.8325688073394495,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-2108",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 2108,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00013462097079767568,
+      "loss": 1.19,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.819954128440367,
+      "eval_loss": 1.0689420700073242,
+      "eval_runtime": 10.3665,
+      "eval_samples_per_second": 84.117,
+      "eval_steps_per_second": 0.675,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.00011218414233139642,
+      "loss": 0.5832,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8279816513761468,
+      "eval_loss": 1.1704012155532837,
+      "eval_runtime": 6.2773,
+      "eval_samples_per_second": 138.912,
+      "eval_steps_per_second": 1.115,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 8.974731386511713e-05,
+      "loss": 0.4391,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.823394495412844,
+      "eval_loss": 1.1875587701797485,
+      "eval_runtime": 6.2138,
+      "eval_samples_per_second": 140.333,
+      "eval_steps_per_second": 1.127,
+      "step": 1581
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 6.731048539883784e-05,
+      "loss": 0.3642,
+      "step": 2108
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8325688073394495,
+      "eval_loss": 1.1609015464782715,
+      "eval_runtime": 8.1221,
+      "eval_samples_per_second": 107.361,
+      "eval_steps_per_second": 0.862,
+      "step": 2108
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3689,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 32344049138640.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6204520709084631,
+    "learning_rate": 0.00015705779926395499,
+    "num_train_epochs": 7,
+    "temperature": 16
+  }
+}

run-2/checkpoint-2108/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4be0222258df26817e91fe31755ef0f0e5b0b1423f1be49de3cd924d2cf509
+size 4283

run-2/checkpoint-2108/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-3689/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-2/checkpoint-3689/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b6343018918e71362934dc01166336ef78fabf6dc5eb527c32df011bc4923cff
+size 17549312

run-2/checkpoint-3689/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:73aa6ea179fb648e4fbd8572af5ac4894a38924917ea67eecfcb26ccade89514
+size 35122373

run-2/checkpoint-3689/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1944a0f816099340e1dbf1a93e033a5a3a4bb0c92f096d7559d0edb8afc135de
+size 13617

run-2/checkpoint-3689/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f1a2d7f6656eeeaa378960e23366ac79360db78a37b3d208225a1a1fbe644fa
+size 627

run-2/checkpoint-3689/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-3689/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-3689/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-3689/trainer_state.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "best_metric": 0.8325688073394495,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-2/checkpoint-2108",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 3689,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00013462097079767568,
+      "loss": 1.19,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.819954128440367,
+      "eval_loss": 1.0689420700073242,
+      "eval_runtime": 10.3665,
+      "eval_samples_per_second": 84.117,
+      "eval_steps_per_second": 0.675,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.00011218414233139642,
+      "loss": 0.5832,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8279816513761468,
+      "eval_loss": 1.1704012155532837,
+      "eval_runtime": 6.2773,
+      "eval_samples_per_second": 138.912,
+      "eval_steps_per_second": 1.115,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 8.974731386511713e-05,
+      "loss": 0.4391,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.823394495412844,
+      "eval_loss": 1.1875587701797485,
+      "eval_runtime": 6.2138,
+      "eval_samples_per_second": 140.333,
+      "eval_steps_per_second": 1.127,
+      "step": 1581
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 6.731048539883784e-05,
+      "loss": 0.3642,
+      "step": 2108
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8325688073394495,
+      "eval_loss": 1.1609015464782715,
+      "eval_runtime": 8.1221,
+      "eval_samples_per_second": 107.361,
+      "eval_steps_per_second": 0.862,
+      "step": 2108
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 4.4873656932558565e-05,
+      "loss": 0.3145,
+      "step": 2635
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.823394495412844,
+      "eval_loss": 1.2187858819961548,
+      "eval_runtime": 8.576,
+      "eval_samples_per_second": 101.679,
+      "eval_steps_per_second": 0.816,
+      "step": 2635
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 2.2436828466279283e-05,
+      "loss": 0.2816,
+      "step": 3162
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.8268348623853211,
+      "eval_loss": 1.215812087059021,
+      "eval_runtime": 9.7324,
+      "eval_samples_per_second": 89.598,
+      "eval_steps_per_second": 0.719,
+      "step": 3162
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 0.0,
+      "loss": 0.2599,
+      "step": 3689
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.8256880733944955,
+      "eval_loss": 1.249680519104004,
+      "eval_runtime": 9.3708,
+      "eval_samples_per_second": 93.055,
+      "eval_steps_per_second": 0.747,
+      "step": 3689
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3689,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 56642674579560.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6204520709084631,
+    "learning_rate": 0.00015705779926395499,
+    "num_train_epochs": 7,
+    "temperature": 16
+  }
+}

run-2/checkpoint-3689/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b4be0222258df26817e91fe31755ef0f0e5b0b1423f1be49de3cd924d2cf509
+size 4283

run-2/checkpoint-3689/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-2635/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-2635/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f06a2e462faf95a7a6dfe7b2df811a907b6b9c5ea691a5d849433067ca082183
+size 17549312

run-3/checkpoint-2635/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dc7d7662f62738e0ccd547c15022f154f503da4201760caabfa5920412fade0
+size 35122373

run-3/checkpoint-2635/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb154c82007a8758ab9f9ae31d1389cae06c1f2182ea0779f330182347f52f96
+size 13617

run-3/checkpoint-2635/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9f8e46e076b308dd2c6d09606b687c832bccc97292bc86eb495c55b072d3f5aa
+size 627

run-3/checkpoint-2635/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-2635/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-2635/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-2635/trainer_state.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+  "best_metric": 0.8256880733944955,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-527",
+  "epoch": 5.0,
+  "eval_steps": 500,
+  "global_step": 2635,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0002646598360783595,
+      "loss": 0.4864,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8256880733944955,
+      "eval_loss": 0.5460890531539917,
+      "eval_runtime": 6.1659,
+      "eval_samples_per_second": 141.424,
+      "eval_steps_per_second": 1.135,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 0.00019849487705876962,
+      "loss": 0.2623,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8188073394495413,
+      "eval_loss": 0.7069737315177917,
+      "eval_runtime": 7.2395,
+      "eval_samples_per_second": 120.451,
+      "eval_steps_per_second": 0.967,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 0.00013232991803917974,
+      "loss": 0.1902,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.823394495412844,
+      "eval_loss": 0.7423160076141357,
+      "eval_runtime": 7.2214,
+      "eval_samples_per_second": 120.753,
+      "eval_steps_per_second": 0.969,
+      "step": 1581
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 6.616495901958987e-05,
+      "loss": 0.1515,
+      "step": 2108
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.819954128440367,
+      "eval_loss": 0.7632457613945007,
+      "eval_runtime": 7.0986,
+      "eval_samples_per_second": 122.841,
+      "eval_steps_per_second": 0.986,
+      "step": 2108
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 0.0,
+      "loss": 0.1242,
+      "step": 2635
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.8222477064220184,
+      "eval_loss": 0.8318789601325989,
+      "eval_runtime": 7.1337,
+      "eval_samples_per_second": 122.236,
+      "eval_steps_per_second": 0.981,
+      "step": 2635
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2635,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 40436714110320.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.9299267464979704,
+    "learning_rate": 0.00033082479509794936,
+    "num_train_epochs": 5,
+    "temperature": 11
+  }
+}

run-3/checkpoint-2635/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db4a3bf2450a26eff9bfb630b2c96a3526f6ad8689cd760892210e4dc034be42
+size 4283

run-3/checkpoint-2635/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-527/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-3/checkpoint-527/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fea5a3a721533db0cf8666635e2ae9961df8bbe1b0aba043f68650fcd34fb6af
+size 17549312

run-3/checkpoint-527/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:007778dbd50ca906a55463f7fb2ea0b2484e05afc4f460fdf01931038f920f55
+size 35122373

run-3/checkpoint-527/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f943a8d8f93fea7f44cad3b9bfb2d2b97d65b141eb704b9cba669cf42e776b14
+size 13617

run-3/checkpoint-527/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d000fd306f2a2387119376b2a7ce6a310cafb331ef764c119e50e54e29df6617
+size 627

run-3/checkpoint-527/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-527/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-3/checkpoint-527/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-3/checkpoint-527/trainer_state.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "best_metric": 0.8256880733944955,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-3/checkpoint-527",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 527,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.0002646598360783595,
+      "loss": 0.4864,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8256880733944955,
+      "eval_loss": 0.5460890531539917,
+      "eval_runtime": 6.1659,
+      "eval_samples_per_second": 141.424,
+      "eval_steps_per_second": 1.135,
+      "step": 527
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 2635,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 8069051778960.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.9299267464979704,
+    "learning_rate": 0.00033082479509794936,
+    "num_train_epochs": 5,
+    "temperature": 11
+  }
+}

run-3/checkpoint-527/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:db4a3bf2450a26eff9bfb630b2c96a3526f6ad8689cd760892210e4dc034be42
+size 4283

run-3/checkpoint-527/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-4/checkpoint-527/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-4/checkpoint-527/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:deb8aa05f92c3e3e3100697f91476432307ef227d91e417e359f633516f53ade
+size 17549312