AlexKolosov commited on
Commit
405ada2
·
1 Parent(s): 5b32b2a

End of training

Browse files
all_results.json CHANGED
@@ -1,7 +1,12 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.6886791284533514,
4
- "train_runtime": 13.5776,
5
- "train_samples_per_second": 5.082,
6
- "train_steps_per_second": 5.082
 
 
 
 
 
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "eval_accuracy": 0.8,
4
+ "eval_loss": 0.6858694553375244,
5
+ "eval_runtime": 0.1243,
6
+ "eval_samples_per_second": 40.237,
7
+ "eval_steps_per_second": 8.047,
8
+ "train_loss": 0.6942512125208757,
9
+ "train_runtime": 21.1635,
10
+ "train_samples_per_second": 3.26,
11
+ "train_steps_per_second": 3.26
12
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_accuracy": 0.8,
4
+ "eval_loss": 0.6858694553375244,
5
+ "eval_runtime": 0.1243,
6
+ "eval_samples_per_second": 40.237,
7
+ "eval_steps_per_second": 8.047
8
+ }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "epoch": 3.0,
3
- "train_loss": 0.6886791284533514,
4
- "train_runtime": 13.5776,
5
- "train_samples_per_second": 5.082,
6
- "train_steps_per_second": 5.082
7
  }
 
1
  {
2
  "epoch": 3.0,
3
+ "train_loss": 0.6942512125208757,
4
+ "train_runtime": 21.1635,
5
+ "train_samples_per_second": 3.26,
6
+ "train_steps_per_second": 3.26
7
  }
trainer_state.json CHANGED
@@ -7,14 +7,77 @@
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 3.0,
12
  "step": 69,
13
  "total_flos": 1465242836520960.0,
14
- "train_loss": 0.6886791284533514,
15
- "train_runtime": 13.5776,
16
- "train_samples_per_second": 5.082,
17
- "train_steps_per_second": 5.082
18
  }
19
  ],
20
  "max_steps": 69,
 
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
+ {
11
+ "epoch": 0.43,
12
+ "learning_rate": 4.27536231884058e-05,
13
+ "loss": 0.6875,
14
+ "step": 10
15
+ },
16
+ {
17
+ "epoch": 0.87,
18
+ "learning_rate": 3.5507246376811596e-05,
19
+ "loss": 0.7142,
20
+ "step": 20
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_accuracy": 0.4,
25
+ "eval_loss": 0.689231276512146,
26
+ "eval_runtime": 0.1389,
27
+ "eval_samples_per_second": 35.988,
28
+ "eval_steps_per_second": 7.198,
29
+ "step": 23
30
+ },
31
+ {
32
+ "epoch": 1.3,
33
+ "learning_rate": 2.826086956521739e-05,
34
+ "loss": 0.6874,
35
+ "step": 30
36
+ },
37
+ {
38
+ "epoch": 1.74,
39
+ "learning_rate": 2.101449275362319e-05,
40
+ "loss": 0.6938,
41
+ "step": 40
42
+ },
43
+ {
44
+ "epoch": 2.0,
45
+ "eval_accuracy": 0.4,
46
+ "eval_loss": 0.7027438879013062,
47
+ "eval_runtime": 0.1438,
48
+ "eval_samples_per_second": 34.781,
49
+ "eval_steps_per_second": 6.956,
50
+ "step": 46
51
+ },
52
+ {
53
+ "epoch": 2.17,
54
+ "learning_rate": 1.3768115942028985e-05,
55
+ "loss": 0.6999,
56
+ "step": 50
57
+ },
58
+ {
59
+ "epoch": 2.61,
60
+ "learning_rate": 6.521739130434783e-06,
61
+ "loss": 0.6823,
62
+ "step": 60
63
+ },
64
+ {
65
+ "epoch": 3.0,
66
+ "eval_accuracy": 0.8,
67
+ "eval_loss": 0.6858694553375244,
68
+ "eval_runtime": 0.1346,
69
+ "eval_samples_per_second": 37.154,
70
+ "eval_steps_per_second": 7.431,
71
+ "step": 69
72
+ },
73
  {
74
  "epoch": 3.0,
75
  "step": 69,
76
  "total_flos": 1465242836520960.0,
77
+ "train_loss": 0.6942512125208757,
78
+ "train_runtime": 21.1635,
79
+ "train_samples_per_second": 3.26,
80
+ "train_steps_per_second": 3.26
81
  }
82
  ],
83
  "max_steps": 69,