Model save

Browse files

Files changed (5) hide show

README.md +65 -33
repo_card.md +6 -1
tokenizer.json +3 -5
trainer_state.json +17 -17
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,36 +1,68 @@
 ---
-{}
 ---
-    ---
-    language: en
-    license: mit
-    ---
-    # distilbert-finetuned-uncased Model
-    This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
-    ## Training Procedure
-    - Number of Epochs: 4
-    - Learning Rate: 2e-05
-    - Batch Size: 128 (per device)
-    - Evaluation Strategy: Every 100 steps
-    - Save Strategy: Every 100 steps
-    - FP16 Training: Yes
-    ## Evaluation Results
-    - Exact Match: 23.347090036216628
-    - F1 Score: 26.869992349988973
-    - Total: 11873
-    - Has Answer Exact: 38.630229419703106
-    - Has Answer F1: 45.686136837283904
-    - Has Answer Total: 5928
-    - No Answer Exact: 8.107653490328007
-    - No Answer F1: 8.107653490328007
-    - No Answer Total: 5945
-    - Best Exact: 50.11370336056599
-    - Best Exact Threshold: 0.0
-    - Best F1: 50.11370336056599
-    - Best F1 Threshold: 0.0

 ---
+tags:
+- generated_from_trainer
+datasets:
+- squad_v2
+model-index:
+- name: distilbert-finetuned-uncased-squad_v2
+  results: []
 ---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# distilbert-finetuned-uncased-squad_v2
+This model was trained from scratch on the squad_v2 dataset.
+It achieves the following results on the evaluation set:
+- Loss: 1.3930
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 128
+- eval_batch_size: 128
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 512
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 4
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 3.6437        | 0.39  | 100  | 2.1780          |
+| 2.1596        | 0.78  | 200  | 1.6557          |
+| 1.8138        | 1.18  | 300  | 1.5683          |
+| 1.6987        | 1.57  | 400  | 1.5076          |
+| 1.6586        | 1.96  | 500  | 1.5350          |
+| 1.5957        | 1.18  | 600  | 1.4431          |
+| 1.5825        | 1.37  | 700  | 1.4955          |
+| 1.5523        | 1.57  | 800  | 1.4444          |
+| 1.5346        | 1.76  | 900  | 1.3930          |
+| 1.5098        | 1.96  | 1000 | 1.4285          |
+### Framework versions
+- Transformers 4.34.1
+- Pytorch 2.1.0+cu118
+- Datasets 2.14.5
+- Tokenizers 0.14.1

repo_card.md CHANGED Viewed

@@ -2,12 +2,17 @@
 {}
 ---
     # distilbert-finetuned-uncased Model
     This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
     ## Training Procedure
-    - Number of Epochs: 2
     - Learning Rate: 2e-05
     - Batch Size: 128 (per device)
     - Evaluation Strategy: Every 100 steps

 {}
 ---
+    ---
+    language: en
+    license: mit
+    ---
     # distilbert-finetuned-uncased Model
     This model is fine-tuned on the SQuAD v2 dataset for the task of question answering.
     ## Training Procedure
+    - Number of Epochs: 4
     - Learning Rate: 2e-05
     - Batch Size: 128 (per device)
     - Evaluation Strategy: Every 100 steps

tokenizer.json CHANGED Viewed

@@ -3,13 +3,11 @@
   "truncation": {
     "direction": "Right",
     "max_length": 512,
-    "strategy": "LongestFirst",
-    "stride": 0
   },
   "padding": {
-    "strategy": {
-      "Fixed": 512
-    },
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

   "truncation": {
     "direction": "Right",
     "max_length": 512,
+    "strategy": "OnlySecond",
+    "stride": 128
   },
   "padding": {
+    "strategy": "BatchLongest",
     "direction": "Right",
     "pad_to_multiple_of": null,
     "pad_id": 0,

trainer_state.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
   "best_metric": 1.393009066581726,
   "best_model_checkpoint": "/content/drive/My Drive/Colab Notebooks/aai520-project/checkpoints/distilbert-finetuned-uncased/checkpoint-900",
-  "epoch": 1.9607843137254903,
   "eval_steps": 100,
-  "global_step": 1000,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
@@ -149,28 +149,28 @@
       "step": 1000
     },
     {
-      "epoch": 1.96,
-      "step": 1000,
-      "total_flos": 5.015589595888435e+16,
-      "train_loss": 0.0,
-      "train_runtime": 0.3572,
-      "train_samples_per_second": 730670.816,
-      "train_steps_per_second": 1427.715
     },
     {
-      "epoch": 1.96,
       "eval_loss": 1.3930128812789917,
-      "eval_runtime": 8.2864,
-      "eval_samples_per_second": 1444.423,
-      "eval_steps_per_second": 11.344,
-      "step": 1000
     }
   ],
   "logging_steps": 100,
-  "max_steps": 510,
-  "num_train_epochs": 2,
   "save_steps": 100,
-  "total_flos": 5.015589595888435e+16,
   "trial_name": null,
   "trial_params": null
 }

 {
   "best_metric": 1.393009066581726,
   "best_model_checkpoint": "/content/drive/My Drive/Colab Notebooks/aai520-project/checkpoints/distilbert-finetuned-uncased/checkpoint-900",
+  "epoch": 4.0,
   "eval_steps": 100,
+  "global_step": 1020,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
       "step": 1000
     },
     {
+      "epoch": 4.0,
+      "step": 1020,
+      "total_flos": 5.148633647651021e+16,
+      "train_loss": 0.028946983113008386,
+      "train_runtime": 27.4849,
+      "train_samples_per_second": 18992.679,
+      "train_steps_per_second": 37.111
     },
     {
+      "epoch": 4.0,
       "eval_loss": 1.3930128812789917,
+      "eval_runtime": 8.2728,
+      "eval_samples_per_second": 1446.798,
+      "eval_steps_per_second": 11.363,
+      "step": 1020
     }
   ],
   "logging_steps": 100,
+  "max_steps": 1020,
+  "num_train_epochs": 4,
   "save_steps": 100,
+  "total_flos": 5.148633647651021e+16,
   "trial_name": null,
   "trial_params": null
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c2218707d8f17a87a80bd2f04a5dd940a8048c67f7e922aee33e6506357a060
 size 4664

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c7ae8b9eb8e6eebcf1c334060dd09414a7a9d6f7ee56c74412375c0cdf85353
 size 4664