File size: 2,454 Bytes
96a88b5
1779fb2
 
 
96a88b5
 
 
 
 
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
96a88b5
 
 
1779fb2
 
 
 
 
 
96a88b5
 
 
 
1779fb2
 
 
96a88b5
1779fb2
 
96a88b5
 
1779fb2
 
 
 
96a88b5
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "best_metric": 0.4691032179514943,
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-2/checkpoint-4276",
  "epoch": 2.0,
  "eval_steps": 500,
  "global_step": 4276,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.23,
      "grad_norm": 4.28505277633667,
      "learning_rate": 3.0702893894484785e-06,
      "loss": 0.6069,
      "step": 500
    },
    {
      "epoch": 0.47,
      "grad_norm": 9.482794761657715,
      "learning_rate": 2.9196373094951675e-06,
      "loss": 0.5628,
      "step": 1000
    },
    {
      "epoch": 0.7,
      "grad_norm": 22.521339416503906,
      "learning_rate": 2.7689852295418565e-06,
      "loss": 0.5565,
      "step": 1500
    },
    {
      "epoch": 0.94,
      "grad_norm": 26.7753849029541,
      "learning_rate": 2.6183331495885454e-06,
      "loss": 0.5184,
      "step": 2000
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.5730993747711182,
      "eval_matthews_correlation": 0.3853198145814999,
      "eval_runtime": 0.7612,
      "eval_samples_per_second": 1370.225,
      "eval_steps_per_second": 86.706,
      "step": 2138
    },
    {
      "epoch": 1.17,
      "grad_norm": 17.77669334411621,
      "learning_rate": 2.4676810696352344e-06,
      "loss": 0.4619,
      "step": 2500
    },
    {
      "epoch": 1.4,
      "grad_norm": 37.4239387512207,
      "learning_rate": 2.3170289896819234e-06,
      "loss": 0.5014,
      "step": 3000
    },
    {
      "epoch": 1.64,
      "grad_norm": 46.75569534301758,
      "learning_rate": 2.1663769097286124e-06,
      "loss": 0.492,
      "step": 3500
    },
    {
      "epoch": 1.87,
      "grad_norm": 66.9134750366211,
      "learning_rate": 2.0157248297753013e-06,
      "loss": 0.4809,
      "step": 4000
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.6646500825881958,
      "eval_matthews_correlation": 0.4691032179514943,
      "eval_runtime": 0.8224,
      "eval_samples_per_second": 1268.193,
      "eval_steps_per_second": 80.25,
      "step": 4276
    }
  ],
  "logging_steps": 500,
  "max_steps": 10690,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 5,
  "save_steps": 500,
  "total_flos": 65200091402940.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": {
    "learning_rate": 3.2209414694017896e-06,
    "num_train_epochs": 5,
    "per_device_train_batch_size": 4,
    "seed": 16
  }
}