rs545837 commited on
Commit
ba6997c
·
verified ·
1 Parent(s): c16711e

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +135 -0
trainer_state.json ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.87882399559021,
3
+ "best_model_checkpoint": "./results/checkpoint-1000",
4
+ "epoch": 0.48455481526347666,
5
+ "eval_steps": 250,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.04845548152634767,
13
+ "grad_norm": 0.2021484375,
14
+ "learning_rate": 0.0019926230341909047,
15
+ "loss": 5.5548,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.09691096305269534,
20
+ "grad_norm": 0.484375,
21
+ "learning_rate": 0.0019623193935821215,
22
+ "loss": 4.0801,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.12113870381586916,
27
+ "eval_loss": 3.651378631591797,
28
+ "eval_runtime": 1225.4443,
29
+ "eval_samples_per_second": 47.902,
30
+ "eval_steps_per_second": 1.497,
31
+ "step": 250
32
+ },
33
+ {
34
+ "epoch": 0.145366444579043,
35
+ "grad_norm": 0.6796875,
36
+ "learning_rate": 0.0019092830690827923,
37
+ "loss": 3.6511,
38
+ "step": 300
39
+ },
40
+ {
41
+ "epoch": 0.19382192610539067,
42
+ "grad_norm": 0.474609375,
43
+ "learning_rate": 0.0018347669274724923,
44
+ "loss": 3.4945,
45
+ "step": 400
46
+ },
47
+ {
48
+ "epoch": 0.24227740763173833,
49
+ "grad_norm": 0.51171875,
50
+ "learning_rate": 0.0017405312490731885,
51
+ "loss": 3.3662,
52
+ "step": 500
53
+ },
54
+ {
55
+ "epoch": 0.24227740763173833,
56
+ "eval_loss": 3.2901480197906494,
57
+ "eval_runtime": 1223.1792,
58
+ "eval_samples_per_second": 47.991,
59
+ "eval_steps_per_second": 1.5,
60
+ "step": 500
61
+ },
62
+ {
63
+ "epoch": 0.290732889158086,
64
+ "grad_norm": 0.90234375,
65
+ "learning_rate": 0.0016288021449895208,
66
+ "loss": 3.2248,
67
+ "step": 600
68
+ },
69
+ {
70
+ "epoch": 0.3391883706844337,
71
+ "grad_norm": 0.5859375,
72
+ "learning_rate": 0.001502218970119089,
73
+ "loss": 3.1488,
74
+ "step": 700
75
+ },
76
+ {
77
+ "epoch": 0.3634161114476075,
78
+ "eval_loss": 3.054584503173828,
79
+ "eval_runtime": 1225.6755,
80
+ "eval_samples_per_second": 47.893,
81
+ "eval_steps_per_second": 1.497,
82
+ "step": 750
83
+ },
84
+ {
85
+ "epoch": 0.38764385221078135,
86
+ "grad_norm": 0.5546875,
87
+ "learning_rate": 0.001363771974184993,
88
+ "loss": 3.0595,
89
+ "step": 800
90
+ },
91
+ {
92
+ "epoch": 0.436099333737129,
93
+ "grad_norm": 0.498046875,
94
+ "learning_rate": 0.001216731663648138,
95
+ "loss": 2.9756,
96
+ "step": 900
97
+ },
98
+ {
99
+ "epoch": 0.48455481526347666,
100
+ "grad_norm": 0.423828125,
101
+ "learning_rate": 0.0010645715431691336,
102
+ "loss": 2.9173,
103
+ "step": 1000
104
+ },
105
+ {
106
+ "epoch": 0.48455481526347666,
107
+ "eval_loss": 2.87882399559021,
108
+ "eval_runtime": 1223.6,
109
+ "eval_samples_per_second": 47.974,
110
+ "eval_steps_per_second": 1.5,
111
+ "step": 1000
112
+ }
113
+ ],
114
+ "logging_steps": 100,
115
+ "max_steps": 2063,
116
+ "num_input_tokens_seen": 0,
117
+ "num_train_epochs": 1,
118
+ "save_steps": 250,
119
+ "stateful_callbacks": {
120
+ "TrainerControl": {
121
+ "args": {
122
+ "should_epoch_stop": false,
123
+ "should_evaluate": false,
124
+ "should_log": false,
125
+ "should_save": true,
126
+ "should_training_stop": false
127
+ },
128
+ "attributes": {}
129
+ }
130
+ },
131
+ "total_flos": 2.339294739955712e+17,
132
+ "train_batch_size": 4,
133
+ "trial_name": null,
134
+ "trial_params": null
135
+ }