mtzig commited on
Commit
b407277
·
verified ·
1 Parent(s): 1b53c31

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58970c7b1499a51e52f13d34fc84f1be155b729c5dc01535b4fd3b471893cc7c
3
  size 13648688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14728c0dda1002b24225273f56807d0922c03feb73afbc3c9a2b84f92ba99853
3
  size 13648688
last-checkpoint/global_step500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:945c95b87cfa9a4247bbc67dec060d40f98d220efeb8d7bfea909d9f47efb568
3
+ size 20450800
last-checkpoint/global_step500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dae0e38e3cbaf2a462484d1476859c6d7867323c4af89510eb89d5c9d08aed63
3
+ size 20450800
last-checkpoint/global_step500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5e026b69b255260812d62d318537ba4640d841fca11b458b154d3a31b03c0af
3
+ size 20450800
last-checkpoint/global_step500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ec4962a539f87f3a90b500e4e367939ad8029ef0f09c092166c135e5280ad6d
3
+ size 20450800
last-checkpoint/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:069c1f3451602986b71542c8dfb178a4f03afe6c9b0c4ee35743ce4c002eee59
3
+ size 152238
last-checkpoint/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfa039f9de21be98beb1eed1119437f237d6f512e1b762fe0af46a5e4792c3a3
3
+ size 152238
last-checkpoint/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9a3416e998f9b31c1d15894e49a2114183977f218cd3bfba00c3afc6923016
3
+ size 152238
last-checkpoint/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29695eef268967bbc32ca3feaf63f2a35ba3ac928a655cbd7d49855450e24ca3
3
+ size 152238
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step400
 
1
+ global_step500
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abaf0e70a6661521eb40188cdadbb09fcda9f6e1ac539eef99db2b1bc5a7ba52
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1c62d3591874d9423058ca8b4ee1529ba22e4a464cd7a05c124170a54d10f7
3
  size 14960
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:376c466d70aa79c6b0bb9fc6cc87d2e449a16493d5d1155107e37872dcdc22dc
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:769adabc2930b4c5ffaf06e4c14dcf6bc475cf3777b500b26f6eee3ded6fabd2
3
  size 14960
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:865c5a6a69a0b6acfd26560edcb10f0694871429483ae64bee81aba12e73a0b0
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1bbf4e280aa45bd7ecd510773cee429432a2cf8a426b57fcb1765c6e16762915
3
  size 14960
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8848755180edfd38eee9486edbe1a58572435a9c200f8a462726bb43540dcbf5
3
  size 14960
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f776eff2202474e4c474585023a50e014369bc49fa31bbd19616214366af09f8
3
  size 14960
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b217a5016949cb5cd10bb5c4b090e10b845f27963a84cf5bdc1f1d94facb5b3c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b3f9425ed43b96686ab768302a7b350060720823822160ec86dcae8519a36a4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.49185367353212417,
5
  "eval_steps": 40,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2939,6 +2939,730 @@
2939
  "eval_samples_per_second": 2.201,
2940
  "eval_steps_per_second": 0.176,
2941
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2942
  }
2943
  ],
2944
  "logging_steps": 1,
@@ -2958,7 +3682,7 @@
2958
  "attributes": {}
2959
  }
2960
  },
2961
- "total_flos": 393623730028544.0,
2962
  "train_batch_size": 4,
2963
  "trial_name": null,
2964
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6148170919151552,
5
  "eval_steps": 40,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2939
  "eval_samples_per_second": 2.201,
2940
  "eval_steps_per_second": 0.176,
2941
  "step": 400
2942
+ },
2943
+ {
2944
+ "epoch": 0.4930833077159545,
2945
+ "grad_norm": 0.4925378998198651,
2946
+ "learning_rate": 1.1985138830231638e-05,
2947
+ "loss": 0.2452,
2948
+ "step": 401
2949
+ },
2950
+ {
2951
+ "epoch": 0.4943129418997848,
2952
+ "grad_norm": 0.43350371819373273,
2953
+ "learning_rate": 1.1942999301461694e-05,
2954
+ "loss": 0.2145,
2955
+ "step": 402
2956
+ },
2957
+ {
2958
+ "epoch": 0.4955425760836151,
2959
+ "grad_norm": 0.4788710711794286,
2960
+ "learning_rate": 1.1900823885707216e-05,
2961
+ "loss": 0.2355,
2962
+ "step": 403
2963
+ },
2964
+ {
2965
+ "epoch": 0.49677221026744545,
2966
+ "grad_norm": 0.48489068989863693,
2967
+ "learning_rate": 1.1858613361943518e-05,
2968
+ "loss": 0.1636,
2969
+ "step": 404
2970
+ },
2971
+ {
2972
+ "epoch": 0.49800184445127577,
2973
+ "grad_norm": 0.38739819249024443,
2974
+ "learning_rate": 1.1816368509794365e-05,
2975
+ "loss": 0.1768,
2976
+ "step": 405
2977
+ },
2978
+ {
2979
+ "epoch": 0.49923147863510603,
2980
+ "grad_norm": 0.6455001321481116,
2981
+ "learning_rate": 1.177409010951755e-05,
2982
+ "loss": 0.2625,
2983
+ "step": 406
2984
+ },
2985
+ {
2986
+ "epoch": 0.5004611128189363,
2987
+ "grad_norm": 0.6173490831722124,
2988
+ "learning_rate": 1.1731778941990497e-05,
2989
+ "loss": 0.1915,
2990
+ "step": 407
2991
+ },
2992
+ {
2993
+ "epoch": 0.5016907470027667,
2994
+ "grad_norm": 0.4814901038602925,
2995
+ "learning_rate": 1.1689435788695844e-05,
2996
+ "loss": 0.1646,
2997
+ "step": 408
2998
+ },
2999
+ {
3000
+ "epoch": 0.502920381186597,
3001
+ "grad_norm": 0.4617832240658794,
3002
+ "learning_rate": 1.1647061431707e-05,
3003
+ "loss": 0.1671,
3004
+ "step": 409
3005
+ },
3006
+ {
3007
+ "epoch": 0.5041500153704273,
3008
+ "grad_norm": 0.31842872427334545,
3009
+ "learning_rate": 1.1604656653673707e-05,
3010
+ "loss": 0.1587,
3011
+ "step": 410
3012
+ },
3013
+ {
3014
+ "epoch": 0.5053796495542576,
3015
+ "grad_norm": 0.4310369489277566,
3016
+ "learning_rate": 1.156222223780757e-05,
3017
+ "loss": 0.2058,
3018
+ "step": 411
3019
+ },
3020
+ {
3021
+ "epoch": 0.5066092837380879,
3022
+ "grad_norm": 0.3434568907080096,
3023
+ "learning_rate": 1.1519758967867608e-05,
3024
+ "loss": 0.1748,
3025
+ "step": 412
3026
+ },
3027
+ {
3028
+ "epoch": 0.5078389179219183,
3029
+ "grad_norm": 0.3212603412265404,
3030
+ "learning_rate": 1.1477267628145777e-05,
3031
+ "loss": 0.1998,
3032
+ "step": 413
3033
+ },
3034
+ {
3035
+ "epoch": 0.5090685521057485,
3036
+ "grad_norm": 0.3727679962149267,
3037
+ "learning_rate": 1.1434749003452467e-05,
3038
+ "loss": 0.1962,
3039
+ "step": 414
3040
+ },
3041
+ {
3042
+ "epoch": 0.5102981862895789,
3043
+ "grad_norm": 0.5738583382215159,
3044
+ "learning_rate": 1.1392203879102027e-05,
3045
+ "loss": 0.1792,
3046
+ "step": 415
3047
+ },
3048
+ {
3049
+ "epoch": 0.5115278204734092,
3050
+ "grad_norm": 0.3242461291494194,
3051
+ "learning_rate": 1.1349633040898246e-05,
3052
+ "loss": 0.1407,
3053
+ "step": 416
3054
+ },
3055
+ {
3056
+ "epoch": 0.5127574546572394,
3057
+ "grad_norm": 0.39000456676333345,
3058
+ "learning_rate": 1.1307037275119854e-05,
3059
+ "loss": 0.1644,
3060
+ "step": 417
3061
+ },
3062
+ {
3063
+ "epoch": 0.5139870888410698,
3064
+ "grad_norm": 0.3257334260528596,
3065
+ "learning_rate": 1.1264417368505981e-05,
3066
+ "loss": 0.2097,
3067
+ "step": 418
3068
+ },
3069
+ {
3070
+ "epoch": 0.5152167230249001,
3071
+ "grad_norm": 0.37892769382651426,
3072
+ "learning_rate": 1.1221774108241646e-05,
3073
+ "loss": 0.1604,
3074
+ "step": 419
3075
+ },
3076
+ {
3077
+ "epoch": 0.5164463572087304,
3078
+ "grad_norm": 0.424302046031257,
3079
+ "learning_rate": 1.117910828194319e-05,
3080
+ "loss": 0.1472,
3081
+ "step": 420
3082
+ },
3083
+ {
3084
+ "epoch": 0.5176759913925607,
3085
+ "grad_norm": 0.36620159075013065,
3086
+ "learning_rate": 1.1136420677643763e-05,
3087
+ "loss": 0.1656,
3088
+ "step": 421
3089
+ },
3090
+ {
3091
+ "epoch": 0.518905625576391,
3092
+ "grad_norm": 0.3640140474940355,
3093
+ "learning_rate": 1.1093712083778748e-05,
3094
+ "loss": 0.1676,
3095
+ "step": 422
3096
+ },
3097
+ {
3098
+ "epoch": 0.5201352597602213,
3099
+ "grad_norm": 0.5095089795756096,
3100
+ "learning_rate": 1.1050983289171195e-05,
3101
+ "loss": 0.2301,
3102
+ "step": 423
3103
+ },
3104
+ {
3105
+ "epoch": 0.5213648939440516,
3106
+ "grad_norm": 0.3606729676571787,
3107
+ "learning_rate": 1.1008235083017272e-05,
3108
+ "loss": 0.1519,
3109
+ "step": 424
3110
+ },
3111
+ {
3112
+ "epoch": 0.522594528127882,
3113
+ "grad_norm": 0.6045563512444339,
3114
+ "learning_rate": 1.096546825487167e-05,
3115
+ "loss": 0.2366,
3116
+ "step": 425
3117
+ },
3118
+ {
3119
+ "epoch": 0.5238241623117122,
3120
+ "grad_norm": 0.33927673864133545,
3121
+ "learning_rate": 1.092268359463302e-05,
3122
+ "loss": 0.2344,
3123
+ "step": 426
3124
+ },
3125
+ {
3126
+ "epoch": 0.5250537964955426,
3127
+ "grad_norm": 0.42112411326748034,
3128
+ "learning_rate": 1.0879881892529325e-05,
3129
+ "loss": 0.1428,
3130
+ "step": 427
3131
+ },
3132
+ {
3133
+ "epoch": 0.5262834306793729,
3134
+ "grad_norm": 0.5237196748492299,
3135
+ "learning_rate": 1.0837063939103332e-05,
3136
+ "loss": 0.2371,
3137
+ "step": 428
3138
+ },
3139
+ {
3140
+ "epoch": 0.5275130648632032,
3141
+ "grad_norm": 0.45934815233301857,
3142
+ "learning_rate": 1.0794230525197959e-05,
3143
+ "loss": 0.2438,
3144
+ "step": 429
3145
+ },
3146
+ {
3147
+ "epoch": 0.5287426990470335,
3148
+ "grad_norm": 0.6638366111080263,
3149
+ "learning_rate": 1.0751382441941677e-05,
3150
+ "loss": 0.1915,
3151
+ "step": 430
3152
+ },
3153
+ {
3154
+ "epoch": 0.5299723332308638,
3155
+ "grad_norm": 0.4535025325333235,
3156
+ "learning_rate": 1.0708520480733895e-05,
3157
+ "loss": 0.194,
3158
+ "step": 431
3159
+ },
3160
+ {
3161
+ "epoch": 0.5312019674146942,
3162
+ "grad_norm": 0.5223418583566981,
3163
+ "learning_rate": 1.0665645433230345e-05,
3164
+ "loss": 0.2608,
3165
+ "step": 432
3166
+ },
3167
+ {
3168
+ "epoch": 0.5324316015985244,
3169
+ "grad_norm": 0.5251894549889435,
3170
+ "learning_rate": 1.0622758091328469e-05,
3171
+ "loss": 0.1438,
3172
+ "step": 433
3173
+ },
3174
+ {
3175
+ "epoch": 0.5336612357823548,
3176
+ "grad_norm": 0.4790588612821075,
3177
+ "learning_rate": 1.0579859247152774e-05,
3178
+ "loss": 0.1627,
3179
+ "step": 434
3180
+ },
3181
+ {
3182
+ "epoch": 0.5348908699661851,
3183
+ "grad_norm": 0.44616175922026896,
3184
+ "learning_rate": 1.0536949693040224e-05,
3185
+ "loss": 0.2082,
3186
+ "step": 435
3187
+ },
3188
+ {
3189
+ "epoch": 0.5361205041500153,
3190
+ "grad_norm": 0.46980201755709106,
3191
+ "learning_rate": 1.0494030221525582e-05,
3192
+ "loss": 0.2491,
3193
+ "step": 436
3194
+ },
3195
+ {
3196
+ "epoch": 0.5373501383338457,
3197
+ "grad_norm": 0.31327453178418485,
3198
+ "learning_rate": 1.0451101625326798e-05,
3199
+ "loss": 0.1715,
3200
+ "step": 437
3201
+ },
3202
+ {
3203
+ "epoch": 0.538579772517676,
3204
+ "grad_norm": 0.4945036398444153,
3205
+ "learning_rate": 1.0408164697330348e-05,
3206
+ "loss": 0.2311,
3207
+ "step": 438
3208
+ },
3209
+ {
3210
+ "epoch": 0.5398094067015063,
3211
+ "grad_norm": 0.49262428836934985,
3212
+ "learning_rate": 1.0365220230576592e-05,
3213
+ "loss": 0.2812,
3214
+ "step": 439
3215
+ },
3216
+ {
3217
+ "epoch": 0.5410390408853366,
3218
+ "grad_norm": 0.3787467213860296,
3219
+ "learning_rate": 1.0322269018245128e-05,
3220
+ "loss": 0.238,
3221
+ "step": 440
3222
+ },
3223
+ {
3224
+ "epoch": 0.5410390408853366,
3225
+ "eval_accuracy": 0.7914438502673797,
3226
+ "eval_f1": 0.4657534246575342,
3227
+ "eval_loss": 0.4313281178474426,
3228
+ "eval_precision": 0.7391304347826086,
3229
+ "eval_recall": 0.34,
3230
+ "eval_runtime": 22.6212,
3231
+ "eval_samples_per_second": 2.21,
3232
+ "eval_steps_per_second": 0.177,
3233
+ "step": 440
3234
+ },
3235
+ {
3236
+ "epoch": 0.5422686750691669,
3237
+ "grad_norm": 0.37936214504954746,
3238
+ "learning_rate": 1.0279311853640157e-05,
3239
+ "loss": 0.2168,
3240
+ "step": 441
3241
+ },
3242
+ {
3243
+ "epoch": 0.5434983092529972,
3244
+ "grad_norm": 0.3206038847726103,
3245
+ "learning_rate": 1.0236349530175807e-05,
3246
+ "loss": 0.1876,
3247
+ "step": 442
3248
+ },
3249
+ {
3250
+ "epoch": 0.5447279434368275,
3251
+ "grad_norm": 0.36229470962362703,
3252
+ "learning_rate": 1.019338284136149e-05,
3253
+ "loss": 0.1736,
3254
+ "step": 443
3255
+ },
3256
+ {
3257
+ "epoch": 0.5459575776206579,
3258
+ "grad_norm": 0.4075063721479331,
3259
+ "learning_rate": 1.015041258078725e-05,
3260
+ "loss": 0.2283,
3261
+ "step": 444
3262
+ },
3263
+ {
3264
+ "epoch": 0.5471872118044881,
3265
+ "grad_norm": 0.2649302419780058,
3266
+ "learning_rate": 1.0107439542109097e-05,
3267
+ "loss": 0.1342,
3268
+ "step": 445
3269
+ },
3270
+ {
3271
+ "epoch": 0.5484168459883185,
3272
+ "grad_norm": 0.4132655463506092,
3273
+ "learning_rate": 1.0064464519034358e-05,
3274
+ "loss": 0.2227,
3275
+ "step": 446
3276
+ },
3277
+ {
3278
+ "epoch": 0.5496464801721488,
3279
+ "grad_norm": 0.3696111104824441,
3280
+ "learning_rate": 1.0021488305307003e-05,
3281
+ "loss": 0.16,
3282
+ "step": 447
3283
+ },
3284
+ {
3285
+ "epoch": 0.550876114355979,
3286
+ "grad_norm": 0.33812235844778665,
3287
+ "learning_rate": 9.978511694692999e-06,
3288
+ "loss": 0.1787,
3289
+ "step": 448
3290
+ },
3291
+ {
3292
+ "epoch": 0.5521057485398094,
3293
+ "grad_norm": 0.4571938907087761,
3294
+ "learning_rate": 9.935535480965647e-06,
3295
+ "loss": 0.176,
3296
+ "step": 449
3297
+ },
3298
+ {
3299
+ "epoch": 0.5533353827236397,
3300
+ "grad_norm": 0.4297886084823541,
3301
+ "learning_rate": 9.892560457890907e-06,
3302
+ "loss": 0.1822,
3303
+ "step": 450
3304
+ },
3305
+ {
3306
+ "epoch": 0.5545650169074701,
3307
+ "grad_norm": 0.39629426107785415,
3308
+ "learning_rate": 9.849587419212751e-06,
3309
+ "loss": 0.1982,
3310
+ "step": 451
3311
+ },
3312
+ {
3313
+ "epoch": 0.5557946510913003,
3314
+ "grad_norm": 0.4188025429095887,
3315
+ "learning_rate": 9.806617158638515e-06,
3316
+ "loss": 0.263,
3317
+ "step": 452
3318
+ },
3319
+ {
3320
+ "epoch": 0.5570242852751307,
3321
+ "grad_norm": 0.5523505686855087,
3322
+ "learning_rate": 9.763650469824198e-06,
3323
+ "loss": 0.3381,
3324
+ "step": 453
3325
+ },
3326
+ {
3327
+ "epoch": 0.558253919458961,
3328
+ "grad_norm": 0.3507777011017874,
3329
+ "learning_rate": 9.720688146359843e-06,
3330
+ "loss": 0.2102,
3331
+ "step": 454
3332
+ },
3333
+ {
3334
+ "epoch": 0.5594835536427912,
3335
+ "grad_norm": 0.321426765671579,
3336
+ "learning_rate": 9.677730981754875e-06,
3337
+ "loss": 0.1714,
3338
+ "step": 455
3339
+ },
3340
+ {
3341
+ "epoch": 0.5607131878266216,
3342
+ "grad_norm": 0.41323958284891527,
3343
+ "learning_rate": 9.634779769423412e-06,
3344
+ "loss": 0.236,
3345
+ "step": 456
3346
+ },
3347
+ {
3348
+ "epoch": 0.5619428220104519,
3349
+ "grad_norm": 0.49361377740156237,
3350
+ "learning_rate": 9.591835302669657e-06,
3351
+ "loss": 0.1847,
3352
+ "step": 457
3353
+ },
3354
+ {
3355
+ "epoch": 0.5631724561942822,
3356
+ "grad_norm": 0.40610616606906547,
3357
+ "learning_rate": 9.548898374673205e-06,
3358
+ "loss": 0.2124,
3359
+ "step": 458
3360
+ },
3361
+ {
3362
+ "epoch": 0.5644020903781125,
3363
+ "grad_norm": 0.6542907937118401,
3364
+ "learning_rate": 9.505969778474418e-06,
3365
+ "loss": 0.2063,
3366
+ "step": 459
3367
+ },
3368
+ {
3369
+ "epoch": 0.5656317245619428,
3370
+ "grad_norm": 0.34192984136591886,
3371
+ "learning_rate": 9.463050306959782e-06,
3372
+ "loss": 0.1661,
3373
+ "step": 460
3374
+ },
3375
+ {
3376
+ "epoch": 0.5668613587457731,
3377
+ "grad_norm": 0.4336541383620103,
3378
+ "learning_rate": 9.42014075284723e-06,
3379
+ "loss": 0.1666,
3380
+ "step": 461
3381
+ },
3382
+ {
3383
+ "epoch": 0.5680909929296034,
3384
+ "grad_norm": 0.3660193380978002,
3385
+ "learning_rate": 9.377241908671533e-06,
3386
+ "loss": 0.189,
3387
+ "step": 462
3388
+ },
3389
+ {
3390
+ "epoch": 0.5693206271134338,
3391
+ "grad_norm": 0.41494751800096213,
3392
+ "learning_rate": 9.334354566769658e-06,
3393
+ "loss": 0.2119,
3394
+ "step": 463
3395
+ },
3396
+ {
3397
+ "epoch": 0.570550261297264,
3398
+ "grad_norm": 0.5184551716970647,
3399
+ "learning_rate": 9.291479519266108e-06,
3400
+ "loss": 0.2418,
3401
+ "step": 464
3402
+ },
3403
+ {
3404
+ "epoch": 0.5717798954810944,
3405
+ "grad_norm": 0.46046449118174804,
3406
+ "learning_rate": 9.248617558058328e-06,
3407
+ "loss": 0.1998,
3408
+ "step": 465
3409
+ },
3410
+ {
3411
+ "epoch": 0.5730095296649247,
3412
+ "grad_norm": 0.3975308500337465,
3413
+ "learning_rate": 9.205769474802045e-06,
3414
+ "loss": 0.1907,
3415
+ "step": 466
3416
+ },
3417
+ {
3418
+ "epoch": 0.574239163848755,
3419
+ "grad_norm": 0.4237299624678481,
3420
+ "learning_rate": 9.162936060896672e-06,
3421
+ "loss": 0.2087,
3422
+ "step": 467
3423
+ },
3424
+ {
3425
+ "epoch": 0.5754687980325853,
3426
+ "grad_norm": 0.47636428636745337,
3427
+ "learning_rate": 9.12011810747068e-06,
3428
+ "loss": 0.161,
3429
+ "step": 468
3430
+ },
3431
+ {
3432
+ "epoch": 0.5766984322164156,
3433
+ "grad_norm": 0.5257948156557456,
3434
+ "learning_rate": 9.07731640536698e-06,
3435
+ "loss": 0.3091,
3436
+ "step": 469
3437
+ },
3438
+ {
3439
+ "epoch": 0.577928066400246,
3440
+ "grad_norm": 0.4479489865327413,
3441
+ "learning_rate": 9.034531745128334e-06,
3442
+ "loss": 0.2383,
3443
+ "step": 470
3444
+ },
3445
+ {
3446
+ "epoch": 0.5791577005840762,
3447
+ "grad_norm": 0.40206612139181935,
3448
+ "learning_rate": 8.991764916982731e-06,
3449
+ "loss": 0.2534,
3450
+ "step": 471
3451
+ },
3452
+ {
3453
+ "epoch": 0.5803873347679065,
3454
+ "grad_norm": 0.3870155682325831,
3455
+ "learning_rate": 8.949016710828808e-06,
3456
+ "loss": 0.1743,
3457
+ "step": 472
3458
+ },
3459
+ {
3460
+ "epoch": 0.5816169689517369,
3461
+ "grad_norm": 0.35857102895539483,
3462
+ "learning_rate": 8.906287916221259e-06,
3463
+ "loss": 0.1541,
3464
+ "step": 473
3465
+ },
3466
+ {
3467
+ "epoch": 0.5828466031355671,
3468
+ "grad_norm": 0.3297878518016191,
3469
+ "learning_rate": 8.863579322356242e-06,
3470
+ "loss": 0.184,
3471
+ "step": 474
3472
+ },
3473
+ {
3474
+ "epoch": 0.5840762373193975,
3475
+ "grad_norm": 0.2983994696232135,
3476
+ "learning_rate": 8.820891718056815e-06,
3477
+ "loss": 0.1836,
3478
+ "step": 475
3479
+ },
3480
+ {
3481
+ "epoch": 0.5853058715032278,
3482
+ "grad_norm": 0.40664636398131304,
3483
+ "learning_rate": 8.77822589175836e-06,
3484
+ "loss": 0.1594,
3485
+ "step": 476
3486
+ },
3487
+ {
3488
+ "epoch": 0.5865355056870581,
3489
+ "grad_norm": 0.37741982285626524,
3490
+ "learning_rate": 8.73558263149402e-06,
3491
+ "loss": 0.236,
3492
+ "step": 477
3493
+ },
3494
+ {
3495
+ "epoch": 0.5877651398708884,
3496
+ "grad_norm": 0.4536143968466746,
3497
+ "learning_rate": 8.692962724880148e-06,
3498
+ "loss": 0.1882,
3499
+ "step": 478
3500
+ },
3501
+ {
3502
+ "epoch": 0.5889947740547187,
3503
+ "grad_norm": 0.3713741062634612,
3504
+ "learning_rate": 8.650366959101757e-06,
3505
+ "loss": 0.2143,
3506
+ "step": 479
3507
+ },
3508
+ {
3509
+ "epoch": 0.590224408238549,
3510
+ "grad_norm": 0.27112035186414296,
3511
+ "learning_rate": 8.607796120897978e-06,
3512
+ "loss": 0.1448,
3513
+ "step": 480
3514
+ },
3515
+ {
3516
+ "epoch": 0.590224408238549,
3517
+ "eval_accuracy": 0.8128342245989305,
3518
+ "eval_f1": 0.5454545454545454,
3519
+ "eval_loss": 0.41609373688697815,
3520
+ "eval_precision": 0.7777777777777778,
3521
+ "eval_recall": 0.42,
3522
+ "eval_runtime": 23.0163,
3523
+ "eval_samples_per_second": 2.172,
3524
+ "eval_steps_per_second": 0.174,
3525
+ "step": 480
3526
+ },
3527
+ {
3528
+ "epoch": 0.5914540424223793,
3529
+ "grad_norm": 0.3731696583376688,
3530
+ "learning_rate": 8.565250996547538e-06,
3531
+ "loss": 0.1521,
3532
+ "step": 481
3533
+ },
3534
+ {
3535
+ "epoch": 0.5926836766062097,
3536
+ "grad_norm": 0.3715423359630779,
3537
+ "learning_rate": 8.522732371854228e-06,
3538
+ "loss": 0.1561,
3539
+ "step": 482
3540
+ },
3541
+ {
3542
+ "epoch": 0.59391331079004,
3543
+ "grad_norm": 0.4445264123453298,
3544
+ "learning_rate": 8.480241032132394e-06,
3545
+ "loss": 0.1772,
3546
+ "step": 483
3547
+ },
3548
+ {
3549
+ "epoch": 0.5951429449738703,
3550
+ "grad_norm": 0.417726015870805,
3551
+ "learning_rate": 8.437777762192434e-06,
3552
+ "loss": 0.2099,
3553
+ "step": 484
3554
+ },
3555
+ {
3556
+ "epoch": 0.5963725791577006,
3557
+ "grad_norm": 0.5443603401932148,
3558
+ "learning_rate": 8.395343346326295e-06,
3559
+ "loss": 0.2194,
3560
+ "step": 485
3561
+ },
3562
+ {
3563
+ "epoch": 0.5976022133415309,
3564
+ "grad_norm": 0.357174859123756,
3565
+ "learning_rate": 8.352938568293e-06,
3566
+ "loss": 0.1994,
3567
+ "step": 486
3568
+ },
3569
+ {
3570
+ "epoch": 0.5988318475253612,
3571
+ "grad_norm": 0.4122807392874181,
3572
+ "learning_rate": 8.310564211304159e-06,
3573
+ "loss": 0.2326,
3574
+ "step": 487
3575
+ },
3576
+ {
3577
+ "epoch": 0.6000614817091915,
3578
+ "grad_norm": 0.3030477984709631,
3579
+ "learning_rate": 8.268221058009506e-06,
3580
+ "loss": 0.1678,
3581
+ "step": 488
3582
+ },
3583
+ {
3584
+ "epoch": 0.6012911158930219,
3585
+ "grad_norm": 0.32220394621350373,
3586
+ "learning_rate": 8.225909890482456e-06,
3587
+ "loss": 0.1442,
3588
+ "step": 489
3589
+ },
3590
+ {
3591
+ "epoch": 0.6025207500768521,
3592
+ "grad_norm": 0.4724721543903087,
3593
+ "learning_rate": 8.183631490205636e-06,
3594
+ "loss": 0.1962,
3595
+ "step": 490
3596
+ },
3597
+ {
3598
+ "epoch": 0.6037503842606824,
3599
+ "grad_norm": 0.3600147393401425,
3600
+ "learning_rate": 8.141386638056482e-06,
3601
+ "loss": 0.1876,
3602
+ "step": 491
3603
+ },
3604
+ {
3605
+ "epoch": 0.6049800184445128,
3606
+ "grad_norm": 0.4000861620375486,
3607
+ "learning_rate": 8.09917611429279e-06,
3608
+ "loss": 0.2202,
3609
+ "step": 492
3610
+ },
3611
+ {
3612
+ "epoch": 0.606209652628343,
3613
+ "grad_norm": 0.3772045360701519,
3614
+ "learning_rate": 8.057000698538311e-06,
3615
+ "loss": 0.1606,
3616
+ "step": 493
3617
+ },
3618
+ {
3619
+ "epoch": 0.6074392868121734,
3620
+ "grad_norm": 0.37207256879831996,
3621
+ "learning_rate": 8.014861169768362e-06,
3622
+ "loss": 0.2028,
3623
+ "step": 494
3624
+ },
3625
+ {
3626
+ "epoch": 0.6086689209960037,
3627
+ "grad_norm": 0.3479563238322552,
3628
+ "learning_rate": 7.972758306295436e-06,
3629
+ "loss": 0.1574,
3630
+ "step": 495
3631
+ },
3632
+ {
3633
+ "epoch": 0.609898555179834,
3634
+ "grad_norm": 0.4796467447401053,
3635
+ "learning_rate": 7.930692885754806e-06,
3636
+ "loss": 0.224,
3637
+ "step": 496
3638
+ },
3639
+ {
3640
+ "epoch": 0.6111281893636643,
3641
+ "grad_norm": 0.3477407213433725,
3642
+ "learning_rate": 7.888665685090194e-06,
3643
+ "loss": 0.2333,
3644
+ "step": 497
3645
+ },
3646
+ {
3647
+ "epoch": 0.6123578235474946,
3648
+ "grad_norm": 0.3793733407594908,
3649
+ "learning_rate": 7.846677480539392e-06,
3650
+ "loss": 0.2162,
3651
+ "step": 498
3652
+ },
3653
+ {
3654
+ "epoch": 0.613587457731325,
3655
+ "grad_norm": 0.3805341456576239,
3656
+ "learning_rate": 7.80472904761995e-06,
3657
+ "loss": 0.141,
3658
+ "step": 499
3659
+ },
3660
+ {
3661
+ "epoch": 0.6148170919151552,
3662
+ "grad_norm": 0.29991453914514893,
3663
+ "learning_rate": 7.762821161114834e-06,
3664
+ "loss": 0.1767,
3665
+ "step": 500
3666
  }
3667
  ],
3668
  "logging_steps": 1,
 
3682
  "attributes": {}
3683
  }
3684
  },
3685
+ "total_flos": 491155566985216.0,
3686
  "train_batch_size": 4,
3687
  "trial_name": null,
3688
  "trial_params": null