NairaRahim commited on
Commit
88931ec
·
verified ·
1 Parent(s): ffe5d7c

Training in progress, epoch 32, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2719c12471af591c878a21526618319ad4ac35dce0e07dad1360e72204de1768
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42a1f9b7d4161ba1bae9d13deadbeb763fe153dec6526f8caefd629b413acef
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:062cca4f6db12b1f947fb25e682653530fbcd8ea11dd9b4ae6ede9bedeb50d81
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:192e681f7b02b75d8744a13995bf0b21ddfec28797c5b2117fc39e50a373e7e3
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d7fa531d7265d7bc31fddb022d4f0400b7f58d98abd17e982bc79b081e31451
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c4db18d5a66e89d462b11782d22327d30dcce66816316fdc2ddae7a53a0ffe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4fc7201efed2cc22744d4d8152589ca043a54ef0c596de51adaecf88fdd063e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccb7fdd86b6cca8b702924e4a75b448b78185ab1cd7362e327e12720bd73d09b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
- "epoch": 31.0,
5
  "eval_steps": 500,
6
- "global_step": 40455,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3083,6 +3083,105 @@
3083
  "eval_samples_per_second": 26.415,
3084
  "eval_steps_per_second": 3.32,
3085
  "step": 40455
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3086
  }
3087
  ],
3088
  "logging_steps": 100,
@@ -3097,7 +3196,7 @@
3097
  "early_stopping_threshold": 0.0
3098
  },
3099
  "attributes": {
3100
- "early_stopping_patience_counter": 3
3101
  }
3102
  },
3103
  "TrainerControl": {
@@ -3111,7 +3210,7 @@
3111
  "attributes": {}
3112
  }
3113
  },
3114
- "total_flos": 4.362663799446221e+16,
3115
  "train_batch_size": 8,
3116
  "trial_name": null,
3117
  "trial_params": null
 
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
+ "epoch": 32.0,
5
  "eval_steps": 500,
6
+ "global_step": 41760,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3083
  "eval_samples_per_second": 26.415,
3084
  "eval_steps_per_second": 3.32,
3085
  "step": 40455
3086
+ },
3087
+ {
3088
+ "epoch": 31.03448275862069,
3089
+ "grad_norm": 3.288548231124878,
3090
+ "learning_rate": 3.061302681992337e-05,
3091
+ "loss": 33.4014,
3092
+ "step": 40500
3093
+ },
3094
+ {
3095
+ "epoch": 31.11111111111111,
3096
+ "grad_norm": 4.078604221343994,
3097
+ "learning_rate": 3.056513409961686e-05,
3098
+ "loss": 33.5796,
3099
+ "step": 40600
3100
+ },
3101
+ {
3102
+ "epoch": 31.18773946360153,
3103
+ "grad_norm": 3.589484691619873,
3104
+ "learning_rate": 3.0517241379310348e-05,
3105
+ "loss": 32.9547,
3106
+ "step": 40700
3107
+ },
3108
+ {
3109
+ "epoch": 31.264367816091955,
3110
+ "grad_norm": 3.1043126583099365,
3111
+ "learning_rate": 3.046934865900383e-05,
3112
+ "loss": 33.2105,
3113
+ "step": 40800
3114
+ },
3115
+ {
3116
+ "epoch": 31.340996168582375,
3117
+ "grad_norm": 2.446356773376465,
3118
+ "learning_rate": 3.0421455938697318e-05,
3119
+ "loss": 33.1642,
3120
+ "step": 40900
3121
+ },
3122
+ {
3123
+ "epoch": 31.417624521072796,
3124
+ "grad_norm": 2.966627597808838,
3125
+ "learning_rate": 3.0373563218390805e-05,
3126
+ "loss": 32.7751,
3127
+ "step": 41000
3128
+ },
3129
+ {
3130
+ "epoch": 31.49425287356322,
3131
+ "grad_norm": 4.547020435333252,
3132
+ "learning_rate": 3.0325670498084292e-05,
3133
+ "loss": 33.8578,
3134
+ "step": 41100
3135
+ },
3136
+ {
3137
+ "epoch": 31.57088122605364,
3138
+ "grad_norm": 3.151139259338379,
3139
+ "learning_rate": 3.0277777777777776e-05,
3140
+ "loss": 33.2976,
3141
+ "step": 41200
3142
+ },
3143
+ {
3144
+ "epoch": 31.64750957854406,
3145
+ "grad_norm": 2.8900582790374756,
3146
+ "learning_rate": 3.0229885057471262e-05,
3147
+ "loss": 33.1161,
3148
+ "step": 41300
3149
+ },
3150
+ {
3151
+ "epoch": 31.724137931034484,
3152
+ "grad_norm": 2.5485446453094482,
3153
+ "learning_rate": 3.0181992337164756e-05,
3154
+ "loss": 33.596,
3155
+ "step": 41400
3156
+ },
3157
+ {
3158
+ "epoch": 31.800766283524904,
3159
+ "grad_norm": 2.5474777221679688,
3160
+ "learning_rate": 3.0134099616858243e-05,
3161
+ "loss": 33.3569,
3162
+ "step": 41500
3163
+ },
3164
+ {
3165
+ "epoch": 31.877394636015325,
3166
+ "grad_norm": 3.6182713508605957,
3167
+ "learning_rate": 3.0086206896551726e-05,
3168
+ "loss": 32.824,
3169
+ "step": 41600
3170
+ },
3171
+ {
3172
+ "epoch": 31.95402298850575,
3173
+ "grad_norm": 3.898332118988037,
3174
+ "learning_rate": 3.0038314176245213e-05,
3175
+ "loss": 32.8775,
3176
+ "step": 41700
3177
+ },
3178
+ {
3179
+ "epoch": 32.0,
3180
+ "eval_loss": 34.500526428222656,
3181
+ "eval_runtime": 49.4041,
3182
+ "eval_samples_per_second": 26.415,
3183
+ "eval_steps_per_second": 3.32,
3184
+ "step": 41760
3185
  }
3186
  ],
3187
  "logging_steps": 100,
 
3196
  "early_stopping_threshold": 0.0
3197
  },
3198
  "attributes": {
3199
+ "early_stopping_patience_counter": 4
3200
  }
3201
  },
3202
  "TrainerControl": {
 
3210
  "attributes": {}
3211
  }
3212
  },
3213
+ "total_flos": 4.503394889750938e+16,
3214
  "train_batch_size": 8,
3215
  "trial_name": null,
3216
  "trial_params": null