NairaRahim commited on
Commit
a908def
·
verified ·
1 Parent(s): 86a71c0

Training in progress, epoch 31, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2803b5ea78a77c33b077e0060ee73753f95d174d834b993bc91485092dcde1c9
3
  size 1227009528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2719c12471af591c878a21526618319ad4ac35dce0e07dad1360e72204de1768
3
  size 1227009528
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32e5494189f649e0e13e6f06d32a81e216f34cb7f0ab96e7dcedf6d8cc0cf2ad
3
  size 2454133690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:062cca4f6db12b1f947fb25e682653530fbcd8ea11dd9b4ae6ede9bedeb50d81
3
  size 2454133690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:14e90b56db58b69cb5e05f5a06dece018d9fd278779eef8662306f8a599fad84
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d7fa531d7265d7bc31fddb022d4f0400b7f58d98abd17e982bc79b081e31451
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f3041d0db6b547a1562ecb7021cfeb0bcc92669d8d944852d4251894eeac567
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4fc7201efed2cc22744d4d8152589ca043a54ef0c596de51adaecf88fdd063e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
- "epoch": 30.0,
5
  "eval_steps": 500,
6
- "global_step": 39150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2984,6 +2984,105 @@
2984
  "eval_samples_per_second": 26.403,
2985
  "eval_steps_per_second": 3.318,
2986
  "step": 39150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2987
  }
2988
  ],
2989
  "logging_steps": 100,
@@ -2998,7 +3097,7 @@
2998
  "early_stopping_threshold": 0.0
2999
  },
3000
  "attributes": {
3001
- "early_stopping_patience_counter": 2
3002
  }
3003
  },
3004
  "TrainerControl": {
@@ -3012,7 +3111,7 @@
3012
  "attributes": {}
3013
  }
3014
  },
3015
- "total_flos": 4.221932709141504e+16,
3016
  "train_batch_size": 8,
3017
  "trial_name": null,
3018
  "trial_params": null
 
1
  {
2
  "best_metric": 34.4583740234375,
3
  "best_model_checkpoint": "/kaggle/working/output/checkpoint-36540",
4
+ "epoch": 31.0,
5
  "eval_steps": 500,
6
+ "global_step": 40455,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2984
  "eval_samples_per_second": 26.403,
2985
  "eval_steps_per_second": 3.318,
2986
  "step": 39150
2987
+ },
2988
+ {
2989
+ "epoch": 30.038314176245212,
2990
+ "grad_norm": 4.915451526641846,
2991
+ "learning_rate": 3.123467432950192e-05,
2992
+ "loss": 33.0369,
2993
+ "step": 39200
2994
+ },
2995
+ {
2996
+ "epoch": 30.114942528735632,
2997
+ "grad_norm": 4.369636058807373,
2998
+ "learning_rate": 3.1186781609195405e-05,
2999
+ "loss": 33.1459,
3000
+ "step": 39300
3001
+ },
3002
+ {
3003
+ "epoch": 30.191570881226053,
3004
+ "grad_norm": 2.9162957668304443,
3005
+ "learning_rate": 3.113888888888889e-05,
3006
+ "loss": 32.9688,
3007
+ "step": 39400
3008
+ },
3009
+ {
3010
+ "epoch": 30.268199233716476,
3011
+ "grad_norm": 4.7777628898620605,
3012
+ "learning_rate": 3.109099616858238e-05,
3013
+ "loss": 33.7249,
3014
+ "step": 39500
3015
+ },
3016
+ {
3017
+ "epoch": 30.344827586206897,
3018
+ "grad_norm": 3.651850700378418,
3019
+ "learning_rate": 3.104310344827586e-05,
3020
+ "loss": 33.4887,
3021
+ "step": 39600
3022
+ },
3023
+ {
3024
+ "epoch": 30.421455938697317,
3025
+ "grad_norm": 3.29491925239563,
3026
+ "learning_rate": 3.0995210727969346e-05,
3027
+ "loss": 33.5714,
3028
+ "step": 39700
3029
+ },
3030
+ {
3031
+ "epoch": 30.49808429118774,
3032
+ "grad_norm": 3.9116616249084473,
3033
+ "learning_rate": 3.094731800766283e-05,
3034
+ "loss": 33.7763,
3035
+ "step": 39800
3036
+ },
3037
+ {
3038
+ "epoch": 30.57471264367816,
3039
+ "grad_norm": Infinity,
3040
+ "learning_rate": 3.089990421455939e-05,
3041
+ "loss": 32.1907,
3042
+ "step": 39900
3043
+ },
3044
+ {
3045
+ "epoch": 30.65134099616858,
3046
+ "grad_norm": 3.237652063369751,
3047
+ "learning_rate": 3.085201149425287e-05,
3048
+ "loss": 33.344,
3049
+ "step": 40000
3050
+ },
3051
+ {
3052
+ "epoch": 30.727969348659006,
3053
+ "grad_norm": 4.286235809326172,
3054
+ "learning_rate": 3.080459770114943e-05,
3055
+ "loss": 33.181,
3056
+ "step": 40100
3057
+ },
3058
+ {
3059
+ "epoch": 30.804597701149426,
3060
+ "grad_norm": 2.6222527027130127,
3061
+ "learning_rate": 3.075670498084292e-05,
3062
+ "loss": 33.3407,
3063
+ "step": 40200
3064
+ },
3065
+ {
3066
+ "epoch": 30.881226053639846,
3067
+ "grad_norm": 3.7431180477142334,
3068
+ "learning_rate": 3.0708812260536404e-05,
3069
+ "loss": 33.1109,
3070
+ "step": 40300
3071
+ },
3072
+ {
3073
+ "epoch": 30.957854406130267,
3074
+ "grad_norm": 3.0706677436828613,
3075
+ "learning_rate": 3.066091954022989e-05,
3076
+ "loss": 33.3504,
3077
+ "step": 40400
3078
+ },
3079
+ {
3080
+ "epoch": 31.0,
3081
+ "eval_loss": 34.48047637939453,
3082
+ "eval_runtime": 49.4044,
3083
+ "eval_samples_per_second": 26.415,
3084
+ "eval_steps_per_second": 3.32,
3085
+ "step": 40455
3086
  }
3087
  ],
3088
  "logging_steps": 100,
 
3097
  "early_stopping_threshold": 0.0
3098
  },
3099
  "attributes": {
3100
+ "early_stopping_patience_counter": 3
3101
  }
3102
  },
3103
  "TrainerControl": {
 
3111
  "attributes": {}
3112
  }
3113
  },
3114
+ "total_flos": 4.362663799446221e+16,
3115
  "train_batch_size": 8,
3116
  "trial_name": null,
3117
  "trial_params": null