bharati2324 commited on
Commit
fa0c131
·
verified ·
1 Parent(s): d6d5c8b

Training in progress, step 200, checkpoint

Browse files
checkpoint-200/adapter_config.json CHANGED
@@ -23,12 +23,12 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "o_proj",
27
- "up_proj",
28
  "k_proj",
29
- "down_proj",
30
  "v_proj",
 
31
  "q_proj",
 
32
  "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "k_proj",
27
+ "up_proj",
28
  "v_proj",
29
+ "o_proj",
30
  "q_proj",
31
+ "down_proj",
32
  "gate_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
checkpoint-200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:70b4107e558676fcca146e15ed362f16d45dc16bc2930ef5418c8b0ef5bb4b4b
3
  size 45118424
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7974e943995ae8faec3b42c0941d07ef51c97aee451ba8cd7cca7d4d1fe1f141
3
  size 45118424
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be981b5cf0bfcf1cc8ea91d86a2e12ca376e8584d449003744e156a1bbb496b7
3
  size 23159290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35925ff0b6418c98c15d507316875bbd1b1e686d2dfcdd85612ef6cb572cb8a7
3
  size 23159290
checkpoint-200/trainer_state.json CHANGED
@@ -10,70 +10,70 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.021333333333333333,
13
- "grad_norm": 0.47001057863235474,
14
  "learning_rate": 0.00019678111587982831,
15
- "loss": 1.2773,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.042666666666666665,
20
- "grad_norm": 0.3469043970108032,
21
  "learning_rate": 0.0001924892703862661,
22
  "loss": 0.8918,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 0.064,
27
- "grad_norm": 0.4485608637332916,
28
  "learning_rate": 0.00018819742489270387,
29
  "loss": 0.8588,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 0.08533333333333333,
34
- "grad_norm": 0.4924505054950714,
35
  "learning_rate": 0.00018390557939914164,
36
  "loss": 0.7577,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 0.10666666666666667,
41
- "grad_norm": 0.3011874854564667,
42
  "learning_rate": 0.00017961373390557942,
43
  "loss": 0.7796,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 0.128,
48
- "grad_norm": 0.35178664326667786,
49
  "learning_rate": 0.00017532188841201717,
50
  "loss": 0.7647,
51
  "step": 120
52
  },
53
  {
54
  "epoch": 0.14933333333333335,
55
- "grad_norm": 0.29895663261413574,
56
  "learning_rate": 0.00017103004291845494,
57
- "loss": 0.6741,
58
  "step": 140
59
  },
60
  {
61
  "epoch": 0.17066666666666666,
62
- "grad_norm": 0.265635222196579,
63
  "learning_rate": 0.00016673819742489272,
64
  "loss": 0.7586,
65
  "step": 160
66
  },
67
  {
68
  "epoch": 0.192,
69
- "grad_norm": 0.29633283615112305,
70
  "learning_rate": 0.0001624463519313305,
71
  "loss": 0.7364,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 0.21333333333333335,
76
- "grad_norm": 0.3618737757205963,
77
  "learning_rate": 0.00015815450643776824,
78
  "loss": 0.7943,
79
  "step": 200
 
10
  "log_history": [
11
  {
12
  "epoch": 0.021333333333333333,
13
+ "grad_norm": 0.4694526791572571,
14
  "learning_rate": 0.00019678111587982831,
15
+ "loss": 1.2772,
16
  "step": 20
17
  },
18
  {
19
  "epoch": 0.042666666666666665,
20
+ "grad_norm": 0.34691280126571655,
21
  "learning_rate": 0.0001924892703862661,
22
  "loss": 0.8918,
23
  "step": 40
24
  },
25
  {
26
  "epoch": 0.064,
27
+ "grad_norm": 0.44894590973854065,
28
  "learning_rate": 0.00018819742489270387,
29
  "loss": 0.8588,
30
  "step": 60
31
  },
32
  {
33
  "epoch": 0.08533333333333333,
34
+ "grad_norm": 0.4901750981807709,
35
  "learning_rate": 0.00018390557939914164,
36
  "loss": 0.7577,
37
  "step": 80
38
  },
39
  {
40
  "epoch": 0.10666666666666667,
41
+ "grad_norm": 0.3013491630554199,
42
  "learning_rate": 0.00017961373390557942,
43
  "loss": 0.7796,
44
  "step": 100
45
  },
46
  {
47
  "epoch": 0.128,
48
+ "grad_norm": 0.35143589973449707,
49
  "learning_rate": 0.00017532188841201717,
50
  "loss": 0.7647,
51
  "step": 120
52
  },
53
  {
54
  "epoch": 0.14933333333333335,
55
+ "grad_norm": 0.29885634779930115,
56
  "learning_rate": 0.00017103004291845494,
57
+ "loss": 0.674,
58
  "step": 140
59
  },
60
  {
61
  "epoch": 0.17066666666666666,
62
+ "grad_norm": 0.26554301381111145,
63
  "learning_rate": 0.00016673819742489272,
64
  "loss": 0.7586,
65
  "step": 160
66
  },
67
  {
68
  "epoch": 0.192,
69
+ "grad_norm": 0.2963835895061493,
70
  "learning_rate": 0.0001624463519313305,
71
  "loss": 0.7364,
72
  "step": 180
73
  },
74
  {
75
  "epoch": 0.21333333333333335,
76
+ "grad_norm": 0.36166927218437195,
77
  "learning_rate": 0.00015815450643776824,
78
  "loss": 0.7943,
79
  "step": 200
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4651d06185709dd3d181207d567d0b3ed653efb01f01015cb44b8df4eebee657
3
  size 5560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69617c1bea4a27e99b6c6729498e45489dcd01641dbfbfbf0cc0b118ad579975
3
  size 5560