saqidr commited on
Commit
b5ec692
·
verified ·
1 Parent(s): 172ffc0

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:232eaf6924b640670e424f121b0308a7c44eba675a1105736c179b9e656b4fc8
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c523e1752fda46381be0344a31afc01362450b48bbf8705a2c2bdc36cbf5d0d2
3
  size 268290900
run-13/checkpoint-2500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ecd7bb18dc112a2febe521e6b29f5a5b9eec145154d2663152f38803378acdc7
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cf50cbd134a3066e0d74687b39b2bba0fc2a0cdb34238e44082bdf70216c841
3
  size 268290900
run-13/checkpoint-2500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35b446bce357ee567754d20950f94c6d79ce4513715384de5924fb4b00b7966d
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fac8f5ce1a1a762a78650e0ad4773833a79e3d301a367a8ab899a2adfbd9404
3
  size 536643898
run-13/checkpoint-2500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d116f7be47ce88249a782ce7aafaf5b673a05bfa0ebdf9f14ab4c327c94c6b04
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7c943a77651a184c6653dd4706c14c9a27d52136e81942a2b361c74acb9f665
3
  size 1064
run-13/checkpoint-2500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f62c7b1cdbc5f04f78a333fc7ad899d41a314edecf47ae9022088c79c529b2e
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:825ce3dae72879e90f1e7c8bf9db5978e39cf9b4e9cf4d01acbdb15f1075ac03
3
  size 5176
run-15/checkpoint-1000/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
- "eval_runtime": 1.3697,
16
- "eval_samples_per_second": 2263.294,
17
- "eval_steps_per_second": 47.456,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
- "eval_runtime": 1.3637,
32
- "eval_samples_per_second": 2273.273,
33
- "eval_steps_per_second": 47.665,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
- "eval_runtime": 1.3669,
41
- "eval_samples_per_second": 2267.902,
42
- "eval_steps_per_second": 47.553,
43
  "step": 954
44
  },
45
  {
@@ -67,11 +67,11 @@
67
  "attributes": {}
68
  }
69
  },
70
- "total_flos": 319943755193412.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
- "alpha": 0.9970515290608379,
75
  "num_train_epochs": 9,
76
  "temperature": 4
77
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.2519,
16
+ "eval_samples_per_second": 2476.275,
17
+ "eval_steps_per_second": 51.922,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.2525,
32
+ "eval_samples_per_second": 2475.052,
33
+ "eval_steps_per_second": 51.896,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.2545,
41
+ "eval_samples_per_second": 2471.149,
42
+ "eval_steps_per_second": 51.814,
43
  "step": 954
44
  },
45
  {
 
67
  "attributes": {}
68
  }
69
  },
70
+ "total_flos": 343945671947388.0,
71
  "train_batch_size": 48,
72
  "trial_name": null,
73
  "trial_params": {
74
+ "alpha": 0.9969607810126598,
75
  "num_train_epochs": 9,
76
  "temperature": 4
77
  }
run-15/checkpoint-1500/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
- "eval_runtime": 1.3697,
16
- "eval_samples_per_second": 2263.294,
17
- "eval_steps_per_second": 47.456,
18
  "step": 318
19
  },
20
  {
@@ -28,18 +28,18 @@
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
- "eval_runtime": 1.3637,
32
- "eval_samples_per_second": 2273.273,
33
- "eval_steps_per_second": 47.665,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
- "eval_runtime": 1.3669,
41
- "eval_samples_per_second": 2267.902,
42
- "eval_steps_per_second": 47.553,
43
  "step": 954
44
  },
45
  {
@@ -53,9 +53,9 @@
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.902258064516129,
55
  "eval_loss": 0.04977014288306236,
56
- "eval_runtime": 1.3624,
57
- "eval_samples_per_second": 2275.356,
58
- "eval_steps_per_second": 47.709,
59
  "step": 1272
60
  },
61
  {
@@ -83,11 +83,11 @@
83
  "attributes": {}
84
  }
85
  },
86
- "total_flos": 450371359983132.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
- "alpha": 0.9970515290608379,
91
  "num_train_epochs": 9,
92
  "temperature": 4
93
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.2519,
16
+ "eval_samples_per_second": 2476.275,
17
+ "eval_steps_per_second": 51.922,
18
  "step": 318
19
  },
20
  {
 
28
  "epoch": 2.0,
29
  "eval_accuracy": 0.8422580645161291,
30
  "eval_loss": 0.11037396639585495,
31
+ "eval_runtime": 1.2525,
32
+ "eval_samples_per_second": 2475.052,
33
+ "eval_steps_per_second": 51.896,
34
  "step": 636
35
  },
36
  {
37
  "epoch": 3.0,
38
  "eval_accuracy": 0.8883870967741936,
39
  "eval_loss": 0.0677555724978447,
40
+ "eval_runtime": 1.2545,
41
+ "eval_samples_per_second": 2471.149,
42
+ "eval_steps_per_second": 51.814,
43
  "step": 954
44
  },
45
  {
 
53
  "epoch": 4.0,
54
  "eval_accuracy": 0.902258064516129,
55
  "eval_loss": 0.04977014288306236,
56
+ "eval_runtime": 1.2559,
57
+ "eval_samples_per_second": 2468.428,
58
+ "eval_steps_per_second": 51.757,
59
  "step": 1272
60
  },
61
  {
 
83
  "attributes": {}
84
  }
85
  },
86
+ "total_flos": 474373276737108.0,
87
  "train_batch_size": 48,
88
  "trial_name": null,
89
  "trial_params": {
90
+ "alpha": 0.9969607810126598,
91
  "num_train_epochs": 9,
92
  "temperature": 4
93
  }
run-15/checkpoint-500/trainer_state.json CHANGED
@@ -12,9 +12,9 @@
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
- "eval_runtime": 1.3697,
16
- "eval_samples_per_second": 2263.294,
17
- "eval_steps_per_second": 47.456,
18
  "step": 318
19
  },
20
  {
@@ -42,11 +42,11 @@
42
  "attributes": {}
43
  }
44
  },
45
- "total_flos": 189074629692144.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
- "alpha": 0.9970515290608379,
50
  "num_train_epochs": 9,
51
  "temperature": 4
52
  }
 
12
  "epoch": 1.0,
13
  "eval_accuracy": 0.62,
14
  "eval_loss": 0.249518021941185,
15
+ "eval_runtime": 1.2519,
16
+ "eval_samples_per_second": 2476.275,
17
+ "eval_steps_per_second": 51.922,
18
  "step": 318
19
  },
20
  {
 
42
  "attributes": {}
43
  }
44
  },
45
+ "total_flos": 213076546446120.0,
46
  "train_batch_size": 48,
47
  "trial_name": null,
48
  "trial_params": {
49
+ "alpha": 0.9969607810126598,
50
  "num_train_epochs": 9,
51
  "temperature": 4
52
  }
run-15/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:920ec89dc47e401d4efb361d0fd82b37959f7734e045af2043ed258600a2854b
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:825ce3dae72879e90f1e7c8bf9db5978e39cf9b4e9cf4d01acbdb15f1075ac03
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
3
  size 5176