AlexKolosov commited on
Commit
23f4342
·
1 Parent(s): f16fa72

End of training

Browse files
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.8,
4
- "eval_loss": 0.6858694553375244,
5
- "eval_runtime": 0.1243,
6
- "eval_samples_per_second": 40.237,
7
- "eval_steps_per_second": 8.047,
8
- "train_loss": 0.6942512125208757,
9
- "train_runtime": 21.1635,
10
- "train_samples_per_second": 3.26,
11
- "train_steps_per_second": 3.26
12
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.6,
4
+ "eval_loss": 0.6853448748588562,
5
+ "eval_runtime": 0.1365,
6
+ "eval_samples_per_second": 36.618,
7
+ "eval_steps_per_second": 7.324,
8
+ "train_loss": 0.6992312652477319,
9
+ "train_runtime": 27.1417,
10
+ "train_samples_per_second": 2.542,
11
+ "train_steps_per_second": 2.542
12
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 3.0,
3
- "eval_accuracy": 0.8,
4
- "eval_loss": 0.6858694553375244,
5
- "eval_runtime": 0.1243,
6
- "eval_samples_per_second": 40.237,
7
- "eval_steps_per_second": 8.047
8
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.6,
4
+ "eval_loss": 0.6853448748588562,
5
+ "eval_runtime": 0.1365,
6
+ "eval_samples_per_second": 36.618,
7
+ "eval_steps_per_second": 7.324
8
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.6942512125208757,
4
- "train_runtime": 21.1635,
5
- "train_samples_per_second": 3.26,
6
- "train_steps_per_second": 3.26
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.6992312652477319,
4
+ "train_runtime": 27.1417,
5
+ "train_samples_per_second": 2.542,
6
+ "train_steps_per_second": 2.542
7
  }
trainer_state.json CHANGED
@@ -10,74 +10,74 @@
10
  {
11
  "epoch": 0.43,
12
  "learning_rate": 4.27536231884058e-05,
13
- "loss": 0.6875,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.87,
18
  "learning_rate": 3.5507246376811596e-05,
19
- "loss": 0.7142,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 1.0,
24
- "eval_accuracy": 0.4,
25
- "eval_loss": 0.689231276512146,
26
- "eval_runtime": 0.1389,
27
- "eval_samples_per_second": 35.988,
28
- "eval_steps_per_second": 7.198,
29
  "step": 23
30
  },
31
  {
32
  "epoch": 1.3,
33
  "learning_rate": 2.826086956521739e-05,
34
- "loss": 0.6874,
35
  "step": 30
36
  },
37
  {
38
  "epoch": 1.74,
39
  "learning_rate": 2.101449275362319e-05,
40
- "loss": 0.6938,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 2.0,
45
- "eval_accuracy": 0.4,
46
- "eval_loss": 0.7027438879013062,
47
- "eval_runtime": 0.1438,
48
- "eval_samples_per_second": 34.781,
49
- "eval_steps_per_second": 6.956,
50
  "step": 46
51
  },
52
  {
53
  "epoch": 2.17,
54
  "learning_rate": 1.3768115942028985e-05,
55
- "loss": 0.6999,
56
  "step": 50
57
  },
58
  {
59
  "epoch": 2.61,
60
  "learning_rate": 6.521739130434783e-06,
61
- "loss": 0.6823,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 3.0,
66
- "eval_accuracy": 0.8,
67
- "eval_loss": 0.6858694553375244,
68
- "eval_runtime": 0.1346,
69
- "eval_samples_per_second": 37.154,
70
- "eval_steps_per_second": 7.431,
71
  "step": 69
72
  },
73
  {
74
  "epoch": 3.0,
75
  "step": 69,
76
  "total_flos": 1465242836520960.0,
77
- "train_loss": 0.6942512125208757,
78
- "train_runtime": 21.1635,
79
- "train_samples_per_second": 3.26,
80
- "train_steps_per_second": 3.26
81
  }
82
  ],
83
  "max_steps": 69,
 
10
  {
11
  "epoch": 0.43,
12
  "learning_rate": 4.27536231884058e-05,
13
+ "loss": 0.7204,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.87,
18
  "learning_rate": 3.5507246376811596e-05,
19
+ "loss": 0.6918,
20
  "step": 20
21
  },
22
  {
23
  "epoch": 1.0,
24
+ "eval_accuracy": 0.8,
25
+ "eval_loss": 0.6894806623458862,
26
+ "eval_runtime": 0.1526,
27
+ "eval_samples_per_second": 32.764,
28
+ "eval_steps_per_second": 6.553,
29
  "step": 23
30
  },
31
  {
32
  "epoch": 1.3,
33
  "learning_rate": 2.826086956521739e-05,
34
+ "loss": 0.6986,
35
  "step": 30
36
  },
37
  {
38
  "epoch": 1.74,
39
  "learning_rate": 2.101449275362319e-05,
40
+ "loss": 0.7019,
41
  "step": 40
42
  },
43
  {
44
  "epoch": 2.0,
45
+ "eval_accuracy": 0.6,
46
+ "eval_loss": 0.6858540177345276,
47
+ "eval_runtime": 0.1355,
48
+ "eval_samples_per_second": 36.891,
49
+ "eval_steps_per_second": 7.378,
50
  "step": 46
51
  },
52
  {
53
  "epoch": 2.17,
54
  "learning_rate": 1.3768115942028985e-05,
55
+ "loss": 0.6927,
56
  "step": 50
57
  },
58
  {
59
  "epoch": 2.61,
60
  "learning_rate": 6.521739130434783e-06,
61
+ "loss": 0.69,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 3.0,
66
+ "eval_accuracy": 0.6,
67
+ "eval_loss": 0.6853448748588562,
68
+ "eval_runtime": 0.1356,
69
+ "eval_samples_per_second": 36.871,
70
+ "eval_steps_per_second": 7.374,
71
  "step": 69
72
  },
73
  {
74
  "epoch": 3.0,
75
  "step": 69,
76
  "total_flos": 1465242836520960.0,
77
+ "train_loss": 0.6992312652477319,
78
+ "train_runtime": 27.1417,
79
+ "train_samples_per_second": 2.542,
80
+ "train_steps_per_second": 2.542
81
  }
82
  ],
83
  "max_steps": 69,