tom813 commited on
Commit
1c480f3
1 Parent(s): ed9b1f1

one table update 6000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e6069a78f8988dfcbd9ce8f46655ca2c28ead825b4d88fdb576649f6ba0e23
3
  size 261189898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed411523e825514d3a68061cdafdc3807eb42d802ff4f39fa2958d1ae57fe545
3
  size 261189898
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5b37a23fdc89c1c57154a5bbb2b82bcdfebeae7b1f32913e8f5838b4365d5b94
3
  size 1044540346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96599b7ccdf5b3a524ac5736c85b602d224dd22026f48a1002f23017f67c0b9f
3
  size 1044540346
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df5abd41fec659ebbe7dd2b4caff7408e32c39ef09f30071b012af22a9ef7d04
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53de026ea02239c8920d0a8191312db2f91442b4779f916d3bb9e22f1bcba98b
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ac7de8907e79c9f47fccf3aa89da8ca3d3d851fdb56e005195640f80ea9c8917
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bd3d777aeaba0b41d9a03c6730b70ea7cfcb24e68ae7498f71468201e53ab38
3
  size 1064
tokenizer.json CHANGED
@@ -1,7 +1,19 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 256,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 11,
14
+ "pad_type_id": 0,
15
+ "pad_token": "<|endoftext|>"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 0,
trainer_state.json CHANGED
@@ -1,76 +1,28 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.176416609314062,
5
- "global_step": 30000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.92,
12
  "learning_rate": 0.0002,
13
- "loss": 0.4138,
14
  "step": 3000
15
  },
16
  {
17
- "epoch": 1.84,
18
  "learning_rate": 0.0002,
19
- "loss": 0.298,
20
  "step": 6000
21
- },
22
- {
23
- "epoch": 2.75,
24
- "learning_rate": 0.0002,
25
- "loss": 0.2786,
26
- "step": 9000
27
- },
28
- {
29
- "epoch": 3.67,
30
- "learning_rate": 0.0002,
31
- "loss": 0.263,
32
- "step": 12000
33
- },
34
- {
35
- "epoch": 4.59,
36
- "learning_rate": 0.0002,
37
- "loss": 0.2507,
38
- "step": 15000
39
- },
40
- {
41
- "epoch": 5.51,
42
- "learning_rate": 0.0002,
43
- "loss": 0.2387,
44
- "step": 18000
45
- },
46
- {
47
- "epoch": 6.42,
48
- "learning_rate": 0.0002,
49
- "loss": 0.2279,
50
- "step": 21000
51
- },
52
- {
53
- "epoch": 7.34,
54
- "learning_rate": 0.0002,
55
- "loss": 0.2179,
56
- "step": 24000
57
- },
58
- {
59
- "epoch": 8.26,
60
- "learning_rate": 0.0002,
61
- "loss": 0.209,
62
- "step": 27000
63
- },
64
- {
65
- "epoch": 9.18,
66
- "learning_rate": 0.0002,
67
- "loss": 0.2015,
68
- "step": 30000
69
  }
70
  ],
71
  "max_steps": 30000,
72
- "num_train_epochs": 10,
73
- "total_flos": 2.5389193589849457e+18,
74
  "trial_name": null,
75
  "trial_params": null
76
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.853658536585366,
5
+ "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 2.93,
12
  "learning_rate": 0.0002,
13
+ "loss": 0.3963,
14
  "step": 3000
15
  },
16
  {
17
+ "epoch": 5.85,
18
  "learning_rate": 0.0002,
19
+ "loss": 0.2937,
20
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  }
22
  ],
23
  "max_steps": 30000,
24
+ "num_train_epochs": 30,
25
+ "total_flos": 1.26953109061632e+18,
26
  "trial_name": null,
27
  "trial_params": null
28
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f5569544233e3061e5e000ebbd0e69d9723778ac08056e5ec170696f4d0e2b11
3
  size 4408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1401c5ed0ace9fbb251f5d8a19a6ffcb1a44136d06be0bdc387ea4e700c4ba5
3
  size 4408