stuser2023 commited on Mar 16

Commit

772709a

•

1 Parent(s): 0ef2dca

Training in progress, epoch 4

Browse files

Files changed (24) hide show

model.safetensors +1 -1
run-2/checkpoint-6414/config.json +25 -0
run-2/checkpoint-6414/model.safetensors +3 -0
run-2/checkpoint-6414/optimizer.pt +3 -0
run-2/checkpoint-6414/rng_state.pth +3 -0
run-2/checkpoint-6414/scheduler.pt +3 -0
run-2/checkpoint-6414/special_tokens_map.json +7 -0
run-2/checkpoint-6414/tokenizer.json +0 -0
run-2/checkpoint-6414/tokenizer_config.json +55 -0
run-2/checkpoint-6414/trainer_state.json +137 -0
run-2/checkpoint-6414/training_args.bin +3 -0
run-2/checkpoint-6414/vocab.txt +0 -0
run-2/checkpoint-8552/config.json +25 -0
run-2/checkpoint-8552/model.safetensors +3 -0
run-2/checkpoint-8552/optimizer.pt +3 -0
run-2/checkpoint-8552/rng_state.pth +3 -0
run-2/checkpoint-8552/scheduler.pt +3 -0
run-2/checkpoint-8552/special_tokens_map.json +7 -0
run-2/checkpoint-8552/tokenizer.json +0 -0
run-2/checkpoint-8552/tokenizer_config.json +55 -0
run-2/checkpoint-8552/trainer_state.json +181 -0
run-2/checkpoint-8552/training_args.bin +3 -0
run-2/checkpoint-8552/vocab.txt +0 -0
runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 +2 -2

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:61c63014982a28f5ffe041a72186378c0eae87ab0c43e6e179f1786e0faeb3da
 size 267832560

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c607fa5c2cbe8d1e975d4269b9024790aaf45ba1c82c80025876d29d4c01c52
 size 267832560

run-2/checkpoint-6414/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-6414/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:61c63014982a28f5ffe041a72186378c0eae87ab0c43e6e179f1786e0faeb3da
+size 267832560

run-2/checkpoint-6414/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cb82dd2f2f702887d8456a4104a59e3258708973cb878b4949640972f84feb5
+size 535727290

run-2/checkpoint-6414/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f47a0d767e132afa0311bac58a05938e81c1d9717b7e03ea0561e7524ccde0a1
+size 14244

run-2/checkpoint-6414/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:18092bfc2eecb6099014e0f84c6e33b160a2dd7f7997ff5ac899bba803e7215f
+size 1064

run-2/checkpoint-6414/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-6414/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-6414/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-6414/trainer_state.json ADDED Viewed

	@@ -0,0 +1,137 @@

+{
+  "best_metric": 0.4691032179514943,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-4276",
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 6414,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23,
+      "grad_norm": 4.28505277633667,
+      "learning_rate": 3.0702893894484785e-06,
+      "loss": 0.6069,
+      "step": 500
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 9.482794761657715,
+      "learning_rate": 2.9196373094951675e-06,
+      "loss": 0.5628,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 22.521339416503906,
+      "learning_rate": 2.7689852295418565e-06,
+      "loss": 0.5565,
+      "step": 1500
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 26.7753849029541,
+      "learning_rate": 2.6183331495885454e-06,
+      "loss": 0.5184,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5730993747711182,
+      "eval_matthews_correlation": 0.3853198145814999,
+      "eval_runtime": 0.7612,
+      "eval_samples_per_second": 1370.225,
+      "eval_steps_per_second": 86.706,
+      "step": 2138
+    },
+    {
+      "epoch": 1.17,
+      "grad_norm": 17.77669334411621,
+      "learning_rate": 2.4676810696352344e-06,
+      "loss": 0.4619,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 37.4239387512207,
+      "learning_rate": 2.3170289896819234e-06,
+      "loss": 0.5014,
+      "step": 3000
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 46.75569534301758,
+      "learning_rate": 2.1663769097286124e-06,
+      "loss": 0.492,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 66.9134750366211,
+      "learning_rate": 2.0157248297753013e-06,
+      "loss": 0.4809,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6646500825881958,
+      "eval_matthews_correlation": 0.4691032179514943,
+      "eval_runtime": 0.8224,
+      "eval_samples_per_second": 1268.193,
+      "eval_steps_per_second": 80.25,
+      "step": 4276
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 11.169896125793457,
+      "learning_rate": 1.8650727498219905e-06,
+      "loss": 0.4934,
+      "step": 4500
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 64.01177215576172,
+      "learning_rate": 1.7144206698686793e-06,
+      "loss": 0.4653,
+      "step": 5000
+    },
+    {
+      "epoch": 2.57,
+      "grad_norm": 0.48781171441078186,
+      "learning_rate": 1.5637685899153684e-06,
+      "loss": 0.4639,
+      "step": 5500
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 0.21301893889904022,
+      "learning_rate": 1.4131165099620574e-06,
+      "loss": 0.514,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.8122562766075134,
+      "eval_matthews_correlation": 0.44860917123689154,
+      "eval_runtime": 0.7584,
+      "eval_samples_per_second": 1375.339,
+      "eval_steps_per_second": 87.03,
+      "step": 6414
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 10690,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 97759387855800.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.2209414694017896e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 16
+  }
+}

run-2/checkpoint-6414/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88d2c3d6804ca2d9d22cb74f328c5ae8ec320f8d12a0ef15ea5ae2037f02bd85
+size 4984

run-2/checkpoint-6414/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-8552/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "distilbert-base-uncased",
+  "activation": "gelu",
+  "architectures": [
+    "DistilBertForSequenceClassification"
+  ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
+  "initializer_range": 0.02,
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "pad_token_id": 0,
+  "problem_type": "single_label_classification",
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": false,
+  "tie_weights_": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.38.2",
+  "vocab_size": 30522
+}

run-2/checkpoint-8552/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9c607fa5c2cbe8d1e975d4269b9024790aaf45ba1c82c80025876d29d4c01c52
+size 267832560

run-2/checkpoint-8552/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:118f092dcd6d30aba59a25571af08b784e0c0894555ffb0db5c2fcbf50fbbec1
+size 535727290

run-2/checkpoint-8552/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1e31580ec7d008cae7a2e04bd2b7fc17b30cf1be7c8abc85ff834da89497307f
+size 14244

run-2/checkpoint-8552/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4c4497dc3d4c493b8895dceac58d5ad0927731d3eb42310bfba2246f01247ca8
+size 1064

run-2/checkpoint-8552/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-8552/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

run-2/checkpoint-8552/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,55 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "[PAD]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100": {
+      "content": "[UNK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "101": {
+      "content": "[CLS]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "102": {
+      "content": "[SEP]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "DistilBertTokenizer",
+  "unk_token": "[UNK]"
+}

run-2/checkpoint-8552/trainer_state.json ADDED Viewed

	@@ -0,0 +1,181 @@

+{
+  "best_metric": 0.4779281382373973,
+  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-8552",
+  "epoch": 4.0,
+  "eval_steps": 500,
+  "global_step": 8552,
+  "is_hyper_param_search": true,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.23,
+      "grad_norm": 4.28505277633667,
+      "learning_rate": 3.0702893894484785e-06,
+      "loss": 0.6069,
+      "step": 500
+    },
+    {
+      "epoch": 0.47,
+      "grad_norm": 9.482794761657715,
+      "learning_rate": 2.9196373094951675e-06,
+      "loss": 0.5628,
+      "step": 1000
+    },
+    {
+      "epoch": 0.7,
+      "grad_norm": 22.521339416503906,
+      "learning_rate": 2.7689852295418565e-06,
+      "loss": 0.5565,
+      "step": 1500
+    },
+    {
+      "epoch": 0.94,
+      "grad_norm": 26.7753849029541,
+      "learning_rate": 2.6183331495885454e-06,
+      "loss": 0.5184,
+      "step": 2000
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.5730993747711182,
+      "eval_matthews_correlation": 0.3853198145814999,
+      "eval_runtime": 0.7612,
+      "eval_samples_per_second": 1370.225,
+      "eval_steps_per_second": 86.706,
+      "step": 2138
+    },
+    {
+      "epoch": 1.17,
+      "grad_norm": 17.77669334411621,
+      "learning_rate": 2.4676810696352344e-06,
+      "loss": 0.4619,
+      "step": 2500
+    },
+    {
+      "epoch": 1.4,
+      "grad_norm": 37.4239387512207,
+      "learning_rate": 2.3170289896819234e-06,
+      "loss": 0.5014,
+      "step": 3000
+    },
+    {
+      "epoch": 1.64,
+      "grad_norm": 46.75569534301758,
+      "learning_rate": 2.1663769097286124e-06,
+      "loss": 0.492,
+      "step": 3500
+    },
+    {
+      "epoch": 1.87,
+      "grad_norm": 66.9134750366211,
+      "learning_rate": 2.0157248297753013e-06,
+      "loss": 0.4809,
+      "step": 4000
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.6646500825881958,
+      "eval_matthews_correlation": 0.4691032179514943,
+      "eval_runtime": 0.8224,
+      "eval_samples_per_second": 1268.193,
+      "eval_steps_per_second": 80.25,
+      "step": 4276
+    },
+    {
+      "epoch": 2.1,
+      "grad_norm": 11.169896125793457,
+      "learning_rate": 1.8650727498219905e-06,
+      "loss": 0.4934,
+      "step": 4500
+    },
+    {
+      "epoch": 2.34,
+      "grad_norm": 64.01177215576172,
+      "learning_rate": 1.7144206698686793e-06,
+      "loss": 0.4653,
+      "step": 5000
+    },
+    {
+      "epoch": 2.57,
+      "grad_norm": 0.48781171441078186,
+      "learning_rate": 1.5637685899153684e-06,
+      "loss": 0.4639,
+      "step": 5500
+    },
+    {
+      "epoch": 2.81,
+      "grad_norm": 0.21301893889904022,
+      "learning_rate": 1.4131165099620574e-06,
+      "loss": 0.514,
+      "step": 6000
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.8122562766075134,
+      "eval_matthews_correlation": 0.44860917123689154,
+      "eval_runtime": 0.7584,
+      "eval_samples_per_second": 1375.339,
+      "eval_steps_per_second": 87.03,
+      "step": 6414
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 76.10978698730469,
+      "learning_rate": 1.2624644300087464e-06,
+      "loss": 0.5175,
+      "step": 6500
+    },
+    {
+      "epoch": 3.27,
+      "grad_norm": 13.475150108337402,
+      "learning_rate": 1.1118123500554353e-06,
+      "loss": 0.4448,
+      "step": 7000
+    },
+    {
+      "epoch": 3.51,
+      "grad_norm": 0.116688072681427,
+      "learning_rate": 9.611602701021243e-07,
+      "loss": 0.4159,
+      "step": 7500
+    },
+    {
+      "epoch": 3.74,
+      "grad_norm": 0.9807508587837219,
+      "learning_rate": 8.105081901488134e-07,
+      "loss": 0.4664,
+      "step": 8000
+    },
+    {
+      "epoch": 3.98,
+      "grad_norm": 0.25140833854675293,
+      "learning_rate": 6.598561101955022e-07,
+      "loss": 0.4243,
+      "step": 8500
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 0.8493317365646362,
+      "eval_matthews_correlation": 0.4779281382373973,
+      "eval_runtime": 0.8106,
+      "eval_samples_per_second": 1286.694,
+      "eval_steps_per_second": 81.421,
+      "step": 8552
+    }
+  ],
+  "logging_steps": 500,
+  "max_steps": 10690,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "total_flos": 138682241201148.0,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": {
+    "learning_rate": 3.2209414694017896e-06,
+    "num_train_epochs": 5,
+    "per_device_train_batch_size": 4,
+    "seed": 16
+  }
+}

run-2/checkpoint-8552/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:88d2c3d6804ca2d9d22cb74f328c5ae8ec320f8d12a0ef15ea5ae2037f02bd85
+size 4984

run-2/checkpoint-8552/vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

runs/Mar16_01-46-29_6e5f088ca464/events.out.tfevents.1710554238.6e5f088ca464.226.4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ec5a9a14b9577deef8355652db8864f3bf7dae2e10f27c1880a789a8aac893a5
-size 8548

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1d3d5d136c241945fd96f4c7140c4735311875e6e9a1f29a0e85d8f1ba39a7b
+size 9938