File size: 24,500 Bytes
d9c0423
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
{
  "best_metric": 0.9306620270961232,
  "best_model_checkpoint": "models/google-canine-c-typosquat-v3.1-mnrl/checkpoint-868",
  "epoch": 1.2385861561119293,
  "eval_steps": 62,
  "global_step": 868,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.09131075110456553,
      "grad_norm": 5.324620723724365,
      "learning_rate": 3e-05,
      "loss": 1.219,
      "step": 62
    },
    {
      "epoch": 0.09131075110456553,
      "eval_info_retr_eval_dot_accuracy@1": 0.9574487639003768,
      "eval_info_retr_eval_dot_accuracy@10": 0.994026284348865,
      "eval_info_retr_eval_dot_accuracy@3": 0.9816193364580461,
      "eval_info_retr_eval_dot_accuracy@5": 0.9881444720154398,
      "eval_info_retr_eval_dot_map@100": 0.9710061189570759,
      "eval_info_retr_eval_dot_mrr@10": 0.970718399481841,
      "eval_info_retr_eval_dot_ndcg@10": 0.9764345312554475,
      "eval_info_retr_eval_dot_precision@1": 0.9574487639003768,
      "eval_info_retr_eval_dot_precision@10": 0.09940262843488651,
      "eval_info_retr_eval_dot_precision@3": 0.32720644548601535,
      "eval_info_retr_eval_dot_precision@5": 0.197628894403088,
      "eval_info_retr_eval_dot_recall@1": 0.9574487639003768,
      "eval_info_retr_eval_dot_recall@10": 0.994026284348865,
      "eval_info_retr_eval_dot_recall@3": 0.9816193364580461,
      "eval_info_retr_eval_dot_recall@5": 0.9881444720154398,
      "eval_loss": 0.11100412905216217,
      "eval_para_mine_eval_average_precision": 0.7672056895535211,
      "eval_para_mine_eval_f1": 0.7847944762520686,
      "eval_para_mine_eval_precision": 0.8285314095516364,
      "eval_para_mine_eval_recall": 0.7454436228787721,
      "eval_para_mine_eval_threshold": 0.7348841428756714,
      "eval_runtime": 44.8792,
      "eval_samples_per_second": 242.451,
      "eval_sequential_score": 0.9710061189570759,
      "eval_steps_per_second": 1.916,
      "step": 62
    },
    {
      "epoch": 0.18262150220913106,
      "grad_norm": 3.005993604660034,
      "learning_rate": 4.943211151264843e-05,
      "loss": 0.1012,
      "step": 124
    },
    {
      "epoch": 0.18262150220913106,
      "eval_info_retr_eval_dot_accuracy@1": 0.9798731734215606,
      "eval_info_retr_eval_dot_accuracy@10": 0.9973348037864167,
      "eval_info_retr_eval_dot_accuracy@3": 0.9930153478540575,
      "eval_info_retr_eval_dot_accuracy@5": 0.9957724473853506,
      "eval_info_retr_eval_dot_map@100": 0.9868644025653387,
      "eval_info_retr_eval_dot_mrr@10": 0.9867332819841785,
      "eval_info_retr_eval_dot_ndcg@10": 0.989378667273856,
      "eval_info_retr_eval_dot_precision@1": 0.9798731734215606,
      "eval_info_retr_eval_dot_precision@10": 0.09973348037864167,
      "eval_info_retr_eval_dot_precision@3": 0.33100511595135246,
      "eval_info_retr_eval_dot_precision@5": 0.19915448947707012,
      "eval_info_retr_eval_dot_recall@1": 0.9798731734215606,
      "eval_info_retr_eval_dot_recall@10": 0.9973348037864167,
      "eval_info_retr_eval_dot_recall@3": 0.9930153478540575,
      "eval_info_retr_eval_dot_recall@5": 0.9957724473853506,
      "eval_loss": 0.04060104861855507,
      "eval_para_mine_eval_average_precision": 0.8585605463078829,
      "eval_para_mine_eval_f1": 0.8537043823665016,
      "eval_para_mine_eval_precision": 0.8957241803604032,
      "eval_para_mine_eval_recall": 0.8154503529921904,
      "eval_para_mine_eval_threshold": 0.6834436357021332,
      "eval_runtime": 44.147,
      "eval_samples_per_second": 246.472,
      "eval_sequential_score": 0.9868644025653387,
      "eval_steps_per_second": 1.948,
      "step": 124
    },
    {
      "epoch": 0.27393225331369664,
      "grad_norm": 0.6856785416603088,
      "learning_rate": 4.783169850283944e-05,
      "loss": 0.0693,
      "step": 186
    },
    {
      "epoch": 0.27393225331369664,
      "eval_info_retr_eval_dot_accuracy@1": 0.9855711791195663,
      "eval_info_retr_eval_dot_accuracy@10": 0.9988971601874828,
      "eval_info_retr_eval_dot_accuracy@3": 0.9959562540207701,
      "eval_info_retr_eval_dot_accuracy@5": 0.9977943203749655,
      "eval_info_retr_eval_dot_map@100": 0.9910708474937577,
      "eval_info_retr_eval_dot_mrr@10": 0.991004780431304,
      "eval_info_retr_eval_dot_ndcg@10": 0.9929831934334958,
      "eval_info_retr_eval_dot_precision@1": 0.9855711791195663,
      "eval_info_retr_eval_dot_precision@10": 0.09988971601874828,
      "eval_info_retr_eval_dot_precision@3": 0.3319854180069234,
      "eval_info_retr_eval_dot_precision@5": 0.19955886407499313,
      "eval_info_retr_eval_dot_recall@1": 0.9855711791195663,
      "eval_info_retr_eval_dot_recall@10": 0.9988971601874828,
      "eval_info_retr_eval_dot_recall@3": 0.9959562540207701,
      "eval_info_retr_eval_dot_recall@5": 0.9977943203749655,
      "eval_loss": 0.024971680715680122,
      "eval_para_mine_eval_average_precision": 0.8878504879371486,
      "eval_para_mine_eval_f1": 0.8765703942488466,
      "eval_para_mine_eval_precision": 0.9063191861895322,
      "eval_para_mine_eval_recall": 0.8487124705839841,
      "eval_para_mine_eval_threshold": 0.6699230670928955,
      "eval_runtime": 44.4383,
      "eval_samples_per_second": 244.856,
      "eval_sequential_score": 0.9910708474937577,
      "eval_steps_per_second": 1.935,
      "step": 186
    },
    {
      "epoch": 0.36524300441826213,
      "grad_norm": 0.5819792747497559,
      "learning_rate": 4.623128549303046e-05,
      "loss": 0.0424,
      "step": 248
    },
    {
      "epoch": 0.36524300441826213,
      "eval_info_retr_eval_dot_accuracy@1": 0.9887877952394082,
      "eval_info_retr_eval_dot_accuracy@10": 0.9994485800937414,
      "eval_info_retr_eval_dot_accuracy@3": 0.9978862236926753,
      "eval_info_retr_eval_dot_accuracy@5": 0.9986214502343534,
      "eval_info_retr_eval_dot_map@100": 0.9933887693878125,
      "eval_info_retr_eval_dot_mrr@10": 0.993345798632537,
      "eval_info_retr_eval_dot_ndcg@10": 0.994887605721736,
      "eval_info_retr_eval_dot_precision@1": 0.9887877952394082,
      "eval_info_retr_eval_dot_precision@10": 0.09994485800937414,
      "eval_info_retr_eval_dot_precision@3": 0.33262874123089176,
      "eval_info_retr_eval_dot_precision@5": 0.19972429004687073,
      "eval_info_retr_eval_dot_recall@1": 0.9887877952394082,
      "eval_info_retr_eval_dot_recall@10": 0.9994485800937414,
      "eval_info_retr_eval_dot_recall@3": 0.9978862236926753,
      "eval_info_retr_eval_dot_recall@5": 0.9986214502343534,
      "eval_loss": 0.01834421418607235,
      "eval_para_mine_eval_average_precision": 0.8919509440660052,
      "eval_para_mine_eval_f1": 0.8783150608010457,
      "eval_para_mine_eval_precision": 0.8953218322315484,
      "eval_para_mine_eval_recall": 0.8619423378336262,
      "eval_para_mine_eval_threshold": 0.6646433770656586,
      "eval_runtime": 44.2865,
      "eval_samples_per_second": 245.696,
      "eval_sequential_score": 0.9933887693878125,
      "eval_steps_per_second": 1.942,
      "step": 248
    },
    {
      "epoch": 0.4565537555228277,
      "grad_norm": 0.2500639259815216,
      "learning_rate": 4.463087248322148e-05,
      "loss": 0.0382,
      "step": 310
    },
    {
      "epoch": 0.4565537555228277,
      "eval_info_retr_eval_dot_accuracy@1": 0.9904420549581839,
      "eval_info_retr_eval_dot_accuracy@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_accuracy@3": 0.9982538369635143,
      "eval_info_retr_eval_dot_accuracy@5": 0.998805256869773,
      "eval_info_retr_eval_dot_map@100": 0.9944206662844655,
      "eval_info_retr_eval_dot_mrr@10": 0.9944099442307328,
      "eval_info_retr_eval_dot_ndcg@10": 0.9957667469329782,
      "eval_info_retr_eval_dot_precision@1": 0.9904420549581839,
      "eval_info_retr_eval_dot_precision@10": 0.09998161933645805,
      "eval_info_retr_eval_dot_precision@3": 0.3327512789878381,
      "eval_info_retr_eval_dot_precision@5": 0.19976105137395464,
      "eval_info_retr_eval_dot_recall@1": 0.9904420549581839,
      "eval_info_retr_eval_dot_recall@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_recall@3": 0.9982538369635143,
      "eval_info_retr_eval_dot_recall@5": 0.998805256869773,
      "eval_loss": 0.01802203245460987,
      "eval_para_mine_eval_average_precision": 0.8868720697863423,
      "eval_para_mine_eval_f1": 0.8749901834425858,
      "eval_para_mine_eval_precision": 0.8953481226571621,
      "eval_para_mine_eval_recall": 0.8555374379497094,
      "eval_para_mine_eval_threshold": 0.6635497808456421,
      "eval_runtime": 44.1381,
      "eval_samples_per_second": 246.521,
      "eval_sequential_score": 0.9944206662844655,
      "eval_steps_per_second": 1.948,
      "step": 310
    },
    {
      "epoch": 0.5478645066273933,
      "grad_norm": 0.8567762970924377,
      "learning_rate": 4.3030459473412496e-05,
      "loss": 0.0475,
      "step": 372
    },
    {
      "epoch": 0.5478645066273933,
      "eval_info_retr_eval_dot_accuracy@1": 0.9909934748644426,
      "eval_info_retr_eval_dot_accuracy@10": 0.9995404834114512,
      "eval_info_retr_eval_dot_accuracy@3": 0.9974267071041265,
      "eval_info_retr_eval_dot_accuracy@5": 0.9978862236926753,
      "eval_info_retr_eval_dot_map@100": 0.9943151219822102,
      "eval_info_retr_eval_dot_mrr@10": 0.9942822642643431,
      "eval_info_retr_eval_dot_ndcg@10": 0.9955792346109411,
      "eval_info_retr_eval_dot_precision@1": 0.9909934748644426,
      "eval_info_retr_eval_dot_precision@10": 0.09995404834114513,
      "eval_info_retr_eval_dot_precision@3": 0.33247556903470876,
      "eval_info_retr_eval_dot_precision@5": 0.19957724473853508,
      "eval_info_retr_eval_dot_recall@1": 0.9909934748644426,
      "eval_info_retr_eval_dot_recall@10": 0.9995404834114512,
      "eval_info_retr_eval_dot_recall@3": 0.9974267071041265,
      "eval_info_retr_eval_dot_recall@5": 0.9978862236926753,
      "eval_loss": 0.020289968699216843,
      "eval_para_mine_eval_average_precision": 0.9057199011076745,
      "eval_para_mine_eval_f1": 0.8932047816062182,
      "eval_para_mine_eval_precision": 0.9151715052081852,
      "eval_para_mine_eval_recall": 0.8722678675477544,
      "eval_para_mine_eval_threshold": 0.6722444593906403,
      "eval_runtime": 44.2598,
      "eval_samples_per_second": 245.844,
      "eval_sequential_score": 0.9943151219822102,
      "eval_steps_per_second": 1.943,
      "step": 372
    },
    {
      "epoch": 0.6391752577319587,
      "grad_norm": 1.4318528175354004,
      "learning_rate": 4.1430046463603515e-05,
      "loss": 0.0377,
      "step": 434
    },
    {
      "epoch": 0.6391752577319587,
      "eval_info_retr_eval_dot_accuracy@1": 0.9912691848175719,
      "eval_info_retr_eval_dot_accuracy@10": 0.9997242900468707,
      "eval_info_retr_eval_dot_accuracy@3": 0.998805256869773,
      "eval_info_retr_eval_dot_accuracy@5": 0.9995404834114512,
      "eval_info_retr_eval_dot_map@100": 0.9950291896321672,
      "eval_info_retr_eval_dot_mrr@10": 0.9950066958131474,
      "eval_info_retr_eval_dot_ndcg@10": 0.996211640167477,
      "eval_info_retr_eval_dot_precision@1": 0.9912691848175719,
      "eval_info_retr_eval_dot_precision@10": 0.09997242900468709,
      "eval_info_retr_eval_dot_precision@3": 0.33293508562325763,
      "eval_info_retr_eval_dot_precision@5": 0.1999080966822902,
      "eval_info_retr_eval_dot_recall@1": 0.9912691848175719,
      "eval_info_retr_eval_dot_recall@10": 0.9997242900468707,
      "eval_info_retr_eval_dot_recall@3": 0.998805256869773,
      "eval_info_retr_eval_dot_recall@5": 0.9995404834114512,
      "eval_loss": 0.015328879468142986,
      "eval_para_mine_eval_average_precision": 0.8941081769196199,
      "eval_para_mine_eval_f1": 0.8799847506361558,
      "eval_para_mine_eval_precision": 0.8951106127502512,
      "eval_para_mine_eval_recall": 0.8653615967984535,
      "eval_para_mine_eval_threshold": 0.6661731898784637,
      "eval_runtime": 44.1048,
      "eval_samples_per_second": 246.708,
      "eval_sequential_score": 0.9950291896321672,
      "eval_steps_per_second": 1.95,
      "step": 434
    },
    {
      "epoch": 0.7304860088365243,
      "grad_norm": 0.5837889909744263,
      "learning_rate": 3.9829633453794526e-05,
      "loss": 0.0241,
      "step": 496
    },
    {
      "epoch": 0.7304860088365243,
      "eval_info_retr_eval_dot_accuracy@1": 0.9905339582758937,
      "eval_info_retr_eval_dot_accuracy@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_accuracy@3": 0.9981619336458046,
      "eval_info_retr_eval_dot_accuracy@5": 0.9993566767760316,
      "eval_info_retr_eval_dot_map@100": 0.9944845062676603,
      "eval_info_retr_eval_dot_mrr@10": 0.994473656570431,
      "eval_info_retr_eval_dot_ndcg@10": 0.9958242906817244,
      "eval_info_retr_eval_dot_precision@1": 0.9905339582758937,
      "eval_info_retr_eval_dot_precision@10": 0.09998161933645805,
      "eval_info_retr_eval_dot_precision@3": 0.3327206445486015,
      "eval_info_retr_eval_dot_precision@5": 0.19987133535520635,
      "eval_info_retr_eval_dot_recall@1": 0.9905339582758937,
      "eval_info_retr_eval_dot_recall@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_recall@3": 0.9981619336458046,
      "eval_info_retr_eval_dot_recall@5": 0.9993566767760316,
      "eval_loss": 0.016888538375496864,
      "eval_para_mine_eval_average_precision": 0.8920135475248205,
      "eval_para_mine_eval_f1": 0.8817566331198535,
      "eval_para_mine_eval_precision": 0.8931684329053274,
      "eval_para_mine_eval_recall": 0.870632766166952,
      "eval_para_mine_eval_threshold": 0.6506930291652679,
      "eval_runtime": 43.9952,
      "eval_samples_per_second": 247.322,
      "eval_sequential_score": 0.9944845062676603,
      "eval_steps_per_second": 1.955,
      "step": 496
    },
    {
      "epoch": 0.8217967599410898,
      "grad_norm": 0.7147839069366455,
      "learning_rate": 3.8229220443985544e-05,
      "loss": 0.045,
      "step": 558
    },
    {
      "epoch": 0.8217967599410898,
      "eval_info_retr_eval_dot_accuracy@1": 0.9938424777134455,
      "eval_info_retr_eval_dot_accuracy@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_accuracy@3": 0.9986214502343534,
      "eval_info_retr_eval_dot_accuracy@5": 0.9993566767760316,
      "eval_info_retr_eval_dot_map@100": 0.9963993172223115,
      "eval_info_retr_eval_dot_mrr@10": 0.9963845891265246,
      "eval_info_retr_eval_dot_ndcg@10": 0.9972497507624225,
      "eval_info_retr_eval_dot_precision@1": 0.9938424777134455,
      "eval_info_retr_eval_dot_precision@10": 0.09998161933645805,
      "eval_info_retr_eval_dot_precision@3": 0.33287381674478445,
      "eval_info_retr_eval_dot_precision@5": 0.19987133535520635,
      "eval_info_retr_eval_dot_recall@1": 0.9938424777134455,
      "eval_info_retr_eval_dot_recall@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_recall@3": 0.9986214502343534,
      "eval_info_retr_eval_dot_recall@5": 0.9993566767760316,
      "eval_loss": 0.013228075578808784,
      "eval_para_mine_eval_average_precision": 0.9111664137442287,
      "eval_para_mine_eval_f1": 0.8942790038822603,
      "eval_para_mine_eval_precision": 0.9093417624592629,
      "eval_para_mine_eval_recall": 0.8797071271449414,
      "eval_para_mine_eval_threshold": 0.6545680165290833,
      "eval_runtime": 44.1589,
      "eval_samples_per_second": 246.405,
      "eval_sequential_score": 0.9963993172223115,
      "eval_steps_per_second": 1.948,
      "step": 558
    },
    {
      "epoch": 0.9131075110456554,
      "grad_norm": 1.6569366455078125,
      "learning_rate": 3.662880743417656e-05,
      "loss": 0.0302,
      "step": 620
    },
    {
      "epoch": 0.9131075110456554,
      "eval_info_retr_eval_dot_accuracy@1": 0.9913610881352817,
      "eval_info_retr_eval_dot_accuracy@10": 0.9992647734583219,
      "eval_info_retr_eval_dot_accuracy@3": 0.9980700303280948,
      "eval_info_retr_eval_dot_accuracy@5": 0.9987133535520633,
      "eval_info_retr_eval_dot_map@100": 0.9947594505678611,
      "eval_info_retr_eval_dot_mrr@10": 0.9947092135264177,
      "eval_info_retr_eval_dot_ndcg@10": 0.9958603290565278,
      "eval_info_retr_eval_dot_precision@1": 0.9913610881352817,
      "eval_info_retr_eval_dot_precision@10": 0.0999264773458322,
      "eval_info_retr_eval_dot_precision@3": 0.33269001010936494,
      "eval_info_retr_eval_dot_precision@5": 0.19974267071041266,
      "eval_info_retr_eval_dot_recall@1": 0.9913610881352817,
      "eval_info_retr_eval_dot_recall@10": 0.9992647734583219,
      "eval_info_retr_eval_dot_recall@3": 0.9980700303280948,
      "eval_info_retr_eval_dot_recall@5": 0.9987133535520633,
      "eval_loss": 0.01584717258810997,
      "eval_para_mine_eval_average_precision": 0.9161096169142061,
      "eval_para_mine_eval_f1": 0.9015840220385676,
      "eval_para_mine_eval_precision": 0.9167067369413698,
      "eval_para_mine_eval_recall": 0.886952161992475,
      "eval_para_mine_eval_threshold": 0.6570625901222229,
      "eval_runtime": 44.3069,
      "eval_samples_per_second": 245.583,
      "eval_sequential_score": 0.9947594505678611,
      "eval_steps_per_second": 1.941,
      "step": 620
    },
    {
      "epoch": 1.004418262150221,
      "grad_norm": 0.12535937130451202,
      "learning_rate": 3.502839442436758e-05,
      "loss": 0.0314,
      "step": 682
    },
    {
      "epoch": 1.004418262150221,
      "eval_info_retr_eval_dot_accuracy@1": 0.9902582483227644,
      "eval_info_retr_eval_dot_accuracy@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_accuracy@3": 0.9983457402812241,
      "eval_info_retr_eval_dot_accuracy@5": 0.9994485800937414,
      "eval_info_retr_eval_dot_map@100": 0.9944195539625648,
      "eval_info_retr_eval_dot_mrr@10": 0.9944061513953989,
      "eval_info_retr_eval_dot_ndcg@10": 0.9957806542116834,
      "eval_info_retr_eval_dot_precision@1": 0.9902582483227644,
      "eval_info_retr_eval_dot_precision@10": 0.09998161933645805,
      "eval_info_retr_eval_dot_precision@3": 0.3327819134270747,
      "eval_info_retr_eval_dot_precision@5": 0.19988971601874833,
      "eval_info_retr_eval_dot_recall@1": 0.9902582483227644,
      "eval_info_retr_eval_dot_recall@10": 0.9998161933645805,
      "eval_info_retr_eval_dot_recall@3": 0.9983457402812241,
      "eval_info_retr_eval_dot_recall@5": 0.9994485800937414,
      "eval_loss": 0.01480414904654026,
      "eval_para_mine_eval_average_precision": 0.9018706893922749,
      "eval_para_mine_eval_f1": 0.8877219600165349,
      "eval_para_mine_eval_precision": 0.8927995468022605,
      "eval_para_mine_eval_recall": 0.8827018017733171,
      "eval_para_mine_eval_threshold": 0.6474231779575348,
      "eval_runtime": 43.8665,
      "eval_samples_per_second": 248.048,
      "eval_sequential_score": 0.9944195539625648,
      "eval_steps_per_second": 1.96,
      "step": 682
    },
    {
      "epoch": 1.0559646539027983,
      "grad_norm": 0.2109777331352234,
      "learning_rate": 3.342798141455859e-05,
      "loss": 0.0204,
      "step": 744
    },
    {
      "epoch": 1.0559646539027983,
      "eval_info_retr_eval_dot_accuracy@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_accuracy@10": 1.0,
      "eval_info_retr_eval_dot_accuracy@3": 0.9992647734583219,
      "eval_info_retr_eval_dot_accuracy@5": 0.9999080966822902,
      "eval_info_retr_eval_dot_map@100": 0.996594216217872,
      "eval_info_retr_eval_dot_mrr@10": 0.9965942162178723,
      "eval_info_retr_eval_dot_ndcg@10": 0.9974631239499567,
      "eval_info_retr_eval_dot_precision@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_precision@10": 0.10000000000000002,
      "eval_info_retr_eval_dot_precision@3": 0.33308825781944057,
      "eval_info_retr_eval_dot_precision@5": 0.19998161933645806,
      "eval_info_retr_eval_dot_recall@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_recall@10": 1.0,
      "eval_info_retr_eval_dot_recall@3": 0.9992647734583219,
      "eval_info_retr_eval_dot_recall@5": 0.9999080966822902,
      "eval_loss": 0.011477422900497913,
      "eval_para_mine_eval_average_precision": 0.9183526341973961,
      "eval_para_mine_eval_f1": 0.9008710021688987,
      "eval_para_mine_eval_precision": 0.9157536675501139,
      "eval_para_mine_eval_recall": 0.8864643416910201,
      "eval_para_mine_eval_threshold": 0.6480826139450073,
      "eval_runtime": 45.0423,
      "eval_samples_per_second": 241.573,
      "eval_sequential_score": 0.996594216217872,
      "eval_steps_per_second": 1.909,
      "step": 744
    },
    {
      "epoch": 1.1472754050073637,
      "grad_norm": 0.43087926506996155,
      "learning_rate": 3.182756840474961e-05,
      "loss": 0.0193,
      "step": 806
    },
    {
      "epoch": 1.1472754050073637,
      "eval_info_retr_eval_dot_accuracy@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_accuracy@10": 1.0,
      "eval_info_retr_eval_dot_accuracy@3": 0.9991728701406121,
      "eval_info_retr_eval_dot_accuracy@5": 0.9998161933645805,
      "eval_info_retr_eval_dot_map@100": 0.996541371810189,
      "eval_info_retr_eval_dot_mrr@10": 0.996541371810189,
      "eval_info_retr_eval_dot_ndcg@10": 0.9974193893555866,
      "eval_info_retr_eval_dot_precision@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_precision@10": 0.10000000000000002,
      "eval_info_retr_eval_dot_precision@3": 0.333057623380204,
      "eval_info_retr_eval_dot_precision@5": 0.1999632386729161,
      "eval_info_retr_eval_dot_recall@1": 0.9939343810311553,
      "eval_info_retr_eval_dot_recall@10": 1.0,
      "eval_info_retr_eval_dot_recall@3": 0.9991728701406121,
      "eval_info_retr_eval_dot_recall@5": 0.9998161933645805,
      "eval_loss": 0.01137059461325407,
      "eval_para_mine_eval_average_precision": 0.9152930281344098,
      "eval_para_mine_eval_f1": 0.8996903771768937,
      "eval_para_mine_eval_precision": 0.9043090584686712,
      "eval_para_mine_eval_recall": 0.895118635187201,
      "eval_para_mine_eval_threshold": 0.6495572626590729,
      "eval_runtime": 43.8875,
      "eval_samples_per_second": 247.93,
      "eval_sequential_score": 0.996541371810189,
      "eval_steps_per_second": 1.96,
      "step": 806
    },
    {
      "epoch": 1.2385861561119293,
      "grad_norm": 1.3790454864501953,
      "learning_rate": 3.0227155394940632e-05,
      "loss": 0.0164,
      "step": 868
    },
    {
      "epoch": 1.2385861561119293,
      "eval_info_retr_eval_dot_accuracy@1": 0.9937505743957357,
      "eval_info_retr_eval_dot_accuracy@10": 1.0,
      "eval_info_retr_eval_dot_accuracy@3": 0.9990809668229023,
      "eval_info_retr_eval_dot_accuracy@5": 0.9999080966822902,
      "eval_info_retr_eval_dot_map@100": 0.9964954201513342,
      "eval_info_retr_eval_dot_mrr@10": 0.9964954201513342,
      "eval_info_retr_eval_dot_ndcg@10": 0.9973903161826975,
      "eval_info_retr_eval_dot_precision@1": 0.9937505743957357,
      "eval_info_retr_eval_dot_precision@10": 0.10000000000000002,
      "eval_info_retr_eval_dot_precision@3": 0.3330269889409674,
      "eval_info_retr_eval_dot_precision@5": 0.19998161933645806,
      "eval_info_retr_eval_dot_recall@1": 0.9937505743957357,
      "eval_info_retr_eval_dot_recall@10": 1.0,
      "eval_info_retr_eval_dot_recall@3": 0.9990809668229023,
      "eval_info_retr_eval_dot_recall@5": 0.9999080966822902,
      "eval_loss": 0.009216207079589367,
      "eval_para_mine_eval_average_precision": 0.9306620270961232,
      "eval_para_mine_eval_f1": 0.9113359207458677,
      "eval_para_mine_eval_precision": 0.9197094729482332,
      "eval_para_mine_eval_recall": 0.9031134679054893,
      "eval_para_mine_eval_threshold": 0.6408547163009644,
      "eval_runtime": 44.7336,
      "eval_samples_per_second": 243.24,
      "eval_sequential_score": 0.9964954201513342,
      "eval_steps_per_second": 1.922,
      "step": 868
    }
  ],
  "logging_steps": 62,
  "max_steps": 2037,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 62,
  "stateful_callbacks": {
    "EarlyStoppingCallback": {
      "args": {
        "early_stopping_patience": 3,
        "early_stopping_threshold": 0.0
      },
      "attributes": {
        "early_stopping_patience_counter": 0
      }
    },
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 0.0,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}