Slava commited on Jan 26, 2024

Commit

e99107c

verified ·

1 Parent(s): 7f14c98

Training in progress, epoch 2

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

logs/events.out.tfevents.1706213692.A-FVFFG2C4Q05P.18450.12 +2 -2
logs/events.out.tfevents.1706219148.A-FVFFG2C4Q05P.18450.13 +3 -0
logs/events.out.tfevents.1706220282.A-FVFFG2C4Q05P.18450.14 +3 -0
logs/events.out.tfevents.1706225688.A-FVFFG2C4Q05P.18450.15 +3 -0
logs/events.out.tfevents.1706235148.A-FVFFG2C4Q05P.18450.16 +3 -0
logs/events.out.tfevents.1706238477.A-FVFFG2C4Q05P.18450.17 +3 -0
logs/events.out.tfevents.1706248274.A-FVFFG2C4Q05P.18450.18 +3 -0
logs/events.out.tfevents.1706249191.A-FVFFG2C4Q05P.18450.19 +3 -0
model.safetensors +1 -1
run-11/checkpoint-1054/config.json +34 -0
run-11/checkpoint-1054/model.safetensors +3 -0
run-11/checkpoint-1054/optimizer.pt +3 -0
run-11/checkpoint-1054/rng_state.pth +3 -0
run-11/checkpoint-1054/scheduler.pt +3 -0
run-11/checkpoint-1054/special_tokens_map.json +7 -0
run-11/checkpoint-1054/tokenizer.json +0 -0
run-11/checkpoint-1054/tokenizer_config.json +57 -0
run-11/checkpoint-1054/trainer_state.json +56 -0
run-11/checkpoint-1054/training_args.bin +3 -0
run-11/checkpoint-1054/vocab.txt +0 -0
run-11/checkpoint-3689/config.json +34 -0
run-11/checkpoint-3689/model.safetensors +3 -0
run-11/checkpoint-3689/optimizer.pt +3 -0
run-11/checkpoint-3689/rng_state.pth +3 -0
run-11/checkpoint-3689/scheduler.pt +3 -0
run-11/checkpoint-3689/special_tokens_map.json +7 -0
run-11/checkpoint-3689/tokenizer.json +0 -0
run-11/checkpoint-3689/tokenizer_config.json +57 -0
run-11/checkpoint-3689/trainer_state.json +131 -0
run-11/checkpoint-3689/training_args.bin +3 -0
run-11/checkpoint-3689/vocab.txt +0 -0
run-14/checkpoint-527/config.json +34 -0
run-14/checkpoint-527/model.safetensors +3 -0
run-14/checkpoint-527/optimizer.pt +3 -0
run-14/checkpoint-527/rng_state.pth +3 -0
run-14/checkpoint-527/scheduler.pt +3 -0
run-14/checkpoint-527/special_tokens_map.json +7 -0
run-14/checkpoint-527/tokenizer.json +0 -0
run-14/checkpoint-527/tokenizer_config.json +57 -0
run-14/checkpoint-527/trainer_state.json +41 -0
run-14/checkpoint-527/training_args.bin +3 -0
run-14/checkpoint-527/vocab.txt +0 -0
run-16/checkpoint-1581/config.json +34 -0
run-16/checkpoint-1581/model.safetensors +3 -0
run-16/checkpoint-1581/optimizer.pt +3 -0
run-16/checkpoint-1581/rng_state.pth +3 -0
run-16/checkpoint-1581/scheduler.pt +3 -0
run-16/checkpoint-1581/special_tokens_map.json +7 -0
run-16/checkpoint-1581/tokenizer.json +0 -0
run-16/checkpoint-1581/tokenizer_config.json +57 -0

logs/events.out.tfevents.1706213692.A-FVFFG2C4Q05P.18450.12 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:62cc5bae5643e76f5fe7132152762e60f7c56f8a1f4dd8974ac1be3b8f3804b9
-size 4915

 version https://git-lfs.github.com/spec/v1
+oid sha256:abb62a9dd9f6430bc1eda0081efeb8aadd7c1c72a5da4ee45f1be86337d023dc
+size 8149

logs/events.out.tfevents.1706219148.A-FVFFG2C4Q05P.18450.13 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7ffaf393c526a120288d3f5148beef1cc5b65198d4f7bce83170e04725b8816f
+size 4916

logs/events.out.tfevents.1706220282.A-FVFFG2C4Q05P.18450.14 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:afb280db1879dc86e3d917f2544ce588ef981bcfbd8a4c39827399c9872e95dd
+size 4915

logs/events.out.tfevents.1706225688.A-FVFFG2C4Q05P.18450.15 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14304845751cc508cd7826cfe49a1345788c5dfbb13f65869d8c976eea306d14
+size 5396

logs/events.out.tfevents.1706235148.A-FVFFG2C4Q05P.18450.16 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f50b65e51e0946b0e37e5f4f8cdcf2479100ce4dc9664a3d81723a1b52fc78f1
+size 4916

logs/events.out.tfevents.1706238477.A-FVFFG2C4Q05P.18450.17 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:248b91f61c18e4c8821676ed6fba1d03969f36ac6b8813731ca22ed8e32be6bb
+size 6356

logs/events.out.tfevents.1706248274.A-FVFFG2C4Q05P.18450.18 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:561be7125c6f8eab9f747bb3fe5cac3461393ae59dd2ce00308980f8966a0196
+size 4915

logs/events.out.tfevents.1706249191.A-FVFFG2C4Q05P.18450.19 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8e9193e605eef0fb5bd220ade658b78f062ba5fc8ea0e20ca53918eb38d1998
+size 5396

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:23893f7224a7bc69c792d25073f9d6a6b006ae31c0a78b378e8e817d49d4fafc
 size 17549312

 version https://git-lfs.github.com/spec/v1
+oid sha256:8f7361cc641172e4d433d9122fa85e9b1eda716c64127b2b928ad93b520ec046
 size 17549312

run-11/checkpoint-1054/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-11/checkpoint-1054/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cc6a486e060246d3adb48bcaf435087c55b67c7ad776bb37ee9dac07cfc37448
+size 17549312

run-11/checkpoint-1054/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:feb1d68212ad1095e55c03b80d1c02b1997fc76709ed40a67ed563aba54ea8b3
+size 35122373

run-11/checkpoint-1054/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:260bdbb59b5e150f75450cd4e9defbf6deeda9a4e5690f5284bcc9395a60f0af
+size 13617

run-11/checkpoint-1054/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0de76751247d87b58d90a1b730f62a7a21a6ee787542ae9cdac66f463930db
+size 627

run-11/checkpoint-1054/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-1054/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-1054/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-1054/trainer_state.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "best_metric": 0.8337155963302753,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-1054",
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 1054,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00010911592510991879,
+      "loss": 1.2248,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8130733944954128,
+      "eval_loss": 1.0815833806991577,
+      "eval_runtime": 19.1179,
+      "eval_samples_per_second": 45.612,
+      "eval_steps_per_second": 0.366,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 9.0929937591599e-05,
+      "loss": 0.6152,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8337155963302753,
+      "eval_loss": 1.116389513015747,
+      "eval_runtime": 6.3065,
+      "eval_samples_per_second": 138.271,
+      "eval_steps_per_second": 1.11,
+      "step": 1054
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3689,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 16162868129520.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6271968482895725,
+    "learning_rate": 0.0001273019126282386,
+    "num_train_epochs": 7,
+    "temperature": 11
+  }
+}

run-11/checkpoint-1054/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff57408053b19dbf32dbe4dd4af251a79a2c99bea6a376004e876c375e910b86
+size 4283

run-11/checkpoint-1054/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-3689/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-11/checkpoint-3689/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8693a63cbf06ce0d856ba6668fec281315b4d53102598dabade6f27926391b5
+size 17549312

run-11/checkpoint-3689/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0721eecf6bc93448597411fc82e51cac8198955d973c0e489f03a0155c35d5b1
+size 35122373

run-11/checkpoint-3689/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1944a0f816099340e1dbf1a93e033a5a3a4bb0c92f096d7559d0edb8afc135de
+size 13617

run-11/checkpoint-3689/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:47b32bcfe0ed82d05530a69569db3776f28ee7f8cd9eab70ce1f1cb8da68b200
+size 627

run-11/checkpoint-3689/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-3689/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-11/checkpoint-3689/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-11/checkpoint-3689/trainer_state.json ADDED Viewed

	@@ -0,0 +1,131 @@

+{
+  "best_metric": 0.8337155963302753,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-11/checkpoint-1054",
+  "epoch": 7.0,
+  "eval_steps": 500,
+  "global_step": 3689,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00010911592510991879,
+      "loss": 1.2248,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8130733944954128,
+      "eval_loss": 1.0815833806991577,
+      "eval_runtime": 19.1179,
+      "eval_samples_per_second": 45.612,
+      "eval_steps_per_second": 0.366,
+      "step": 527
+    },
+    {
+      "epoch": 2.0,
+      "learning_rate": 9.0929937591599e-05,
+      "loss": 0.6152,
+      "step": 1054
+    },
+    {
+      "epoch": 2.0,
+      "eval_accuracy": 0.8337155963302753,
+      "eval_loss": 1.116389513015747,
+      "eval_runtime": 6.3065,
+      "eval_samples_per_second": 138.271,
+      "eval_steps_per_second": 1.11,
+      "step": 1054
+    },
+    {
+      "epoch": 3.0,
+      "learning_rate": 7.27439500732792e-05,
+      "loss": 0.471,
+      "step": 1581
+    },
+    {
+      "epoch": 3.0,
+      "eval_accuracy": 0.823394495412844,
+      "eval_loss": 1.144020438194275,
+      "eval_runtime": 6.8066,
+      "eval_samples_per_second": 128.11,
+      "eval_steps_per_second": 1.028,
+      "step": 1581
+    },
+    {
+      "epoch": 4.0,
+      "learning_rate": 5.4557962554959397e-05,
+      "loss": 0.3957,
+      "step": 2108
+    },
+    {
+      "epoch": 4.0,
+      "eval_accuracy": 0.8325688073394495,
+      "eval_loss": 1.1239169836044312,
+      "eval_runtime": 7.0266,
+      "eval_samples_per_second": 124.099,
+      "eval_steps_per_second": 0.996,
+      "step": 2108
+    },
+    {
+      "epoch": 5.0,
+      "learning_rate": 3.63719750366396e-05,
+      "loss": 0.3467,
+      "step": 2635
+    },
+    {
+      "epoch": 5.0,
+      "eval_accuracy": 0.8256880733944955,
+      "eval_loss": 1.1871780157089233,
+      "eval_runtime": 6.99,
+      "eval_samples_per_second": 124.75,
+      "eval_steps_per_second": 1.001,
+      "step": 2635
+    },
+    {
+      "epoch": 6.0,
+      "learning_rate": 1.81859875183198e-05,
+      "loss": 0.3159,
+      "step": 3162
+    },
+    {
+      "epoch": 6.0,
+      "eval_accuracy": 0.8256880733944955,
+      "eval_loss": 1.179205060005188,
+      "eval_runtime": 6.8143,
+      "eval_samples_per_second": 127.965,
+      "eval_steps_per_second": 1.027,
+      "step": 3162
+    },
+    {
+      "epoch": 7.0,
+      "learning_rate": 0.0,
+      "loss": 0.2938,
+      "step": 3689
+    },
+    {
+      "epoch": 7.0,
+      "eval_accuracy": 0.8222477064220184,
+      "eval_loss": 1.2032159566879272,
+      "eval_runtime": 6.2259,
+      "eval_samples_per_second": 140.06,
+      "eval_steps_per_second": 1.124,
+      "step": 3689
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 3689,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
+  "save_steps": 500,
+  "total_flos": 56642674579560.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6271968482895725,
+    "learning_rate": 0.0001273019126282386,
+    "num_train_epochs": 7,
+    "temperature": 11
+  }
+}

run-11/checkpoint-3689/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ff57408053b19dbf32dbe4dd4af251a79a2c99bea6a376004e876c375e910b86
+size 4283

run-11/checkpoint-3689/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-14/checkpoint-527/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-14/checkpoint-527/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5122f009507ce8a1c7246b2d43e68757db9ae2094fc10ddd7bc9a8e5badf6621
+size 17549312

run-14/checkpoint-527/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a2d184c6cb5a14cdea02045a9cafa91b9427d19a434ae77a268f09df5f4c0bd
+size 35122373

run-14/checkpoint-527/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f943a8d8f93fea7f44cad3b9bfb2d2b97d65b141eb704b9cba669cf42e776b14
+size 13617

run-14/checkpoint-527/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:df9ac9373cf08f38d4a64ae49915a4b6948356b919e0ee772a5d4172bdb4988f
+size 627

run-14/checkpoint-527/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-14/checkpoint-527/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-14/checkpoint-527/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}

run-14/checkpoint-527/trainer_state.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "best_metric": 0.8245412844036697,
+  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-14/checkpoint-527",
+  "epoch": 1.0,
+  "eval_steps": 500,
+  "global_step": 527,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "learning_rate": 0.00019474702690730714,
+      "loss": 0.9795,
+      "step": 527
+    },
+    {
+      "epoch": 1.0,
+      "eval_accuracy": 0.8245412844036697,
+      "eval_loss": 0.915910542011261,
+      "eval_runtime": 6.2219,
+      "eval_samples_per_second": 140.15,
+      "eval_steps_per_second": 1.125,
+      "step": 527
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 4216,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 8,
+  "save_steps": 500,
+  "total_flos": 8069051778960.0,
+  "train_batch_size": 128,
+  "trial_name": null,
+  "trial_params": {
+    "alpha": 0.6859594090871681,
+    "learning_rate": 0.00022256803075120817,
+    "num_train_epochs": 8,
+    "temperature": 29
+  }
+}

run-14/checkpoint-527/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b052a21f86b780f6d7f1d08ff09c042d1d2839789de2a6f00d32705987721a5b
+size 4283

run-14/checkpoint-527/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-16/checkpoint-1581/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "google/bert_uncased_L-2_H-128_A-2",
+  "architectures": [
+    "BertForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "classifier_dropout": null,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 128,
+  "id2label": {
+    "0": "negative",
+    "1": "positive"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 512,
+  "label2id": {
+    "negative": "0",
+    "positive": "1"
+  },
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "bert",
+  "num_attention_heads": 2,
+  "num_hidden_layers": 2,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.37.1",
+  "type_vocab_size": 2,
+  "use_cache": true,
+  "vocab_size": 30522
+}

run-16/checkpoint-1581/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f54dcc3139e2bb9fb9042c8ad52729b9f2c4e16b49db76e3dd37bb50c807af3
+size 17549312

run-16/checkpoint-1581/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0435e5ec8a4c4e90acd399141fc5707298fcb3b49ec17f76dc90f87ec1fd8a2
+size 35122373

run-16/checkpoint-1581/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8f1004ae2205b2fd82b8662eddc31a84c64c086e45e25c12d4a402a3d6a6e4d
+size 13617

run-16/checkpoint-1581/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ceaf0c478b858de5bd3b20cc01e01e1ee9146ac6e467d4a79f2247d045da12a2
+size 627

run-16/checkpoint-1581/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-16/checkpoint-1581/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-16/checkpoint-1581/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,57 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_basic_tokenize": true,
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "never_split": null,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
+}