Mel-Iza0 commited on
Commit
6034a28
1 Parent(s): 5ad8564

Training in progress, step 100, checkpoint

Browse files
checkpoint-100/adapter_config.json CHANGED
@@ -20,10 +20,10 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "o_proj",
24
  "k_proj",
25
- "v_proj",
26
- "q_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
24
  "k_proj",
25
+ "o_proj",
26
+ "v_proj"
27
  ],
28
  "task_type": "CAUSAL_LM",
29
  "use_dora": false,
checkpoint-100/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d31aa50d03f21c55365c2e7dc36ad5fd2f5538b6a67f01a4fe4caa60de043c06
3
  size 54560368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb566d484b0610d6187a6fbc6101a35d7772ec94ff1d072f6a19043b8154fc96
3
  size 54560368
checkpoint-100/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af3be30a2128176cc41b81bad3e48de0cea8155454a888fd51470c651d33177b
3
  size 109267450
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0526d3102e720416875bf7ce7c44f8adceb2bcb24599db29507f3dba1ceda178
3
  size 109267450
checkpoint-100/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99cdf514d195ace594a247b30bc61c0b0ccb08b6431f56f75e4d6b9fc7392b92
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:081169a810650fc9c95f030ebe03523d399ed7bbdce143b990084dbe080d837c
3
  size 1064
checkpoint-100/trainer_state.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "best_metric": 0.4733360707759857,
3
  "best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
@@ -10,95 +10,95 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
- "grad_norm": Infinity,
14
- "kl": 16.759851455688477,
15
- "learning_rate": 0.00018,
16
- "logps/chosen": -217.2803497314453,
17
- "logps/rejected": -266.78955078125,
18
- "loss": 0.4135,
19
- "rewards/chosen": 2.8302645683288574,
20
- "rewards/margins": 1.9116979837417603,
21
- "rewards/rejected": 1.01555597782135,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
- "grad_norm": 4.954351902008057,
27
- "kl": 0.40393954515457153,
28
- "learning_rate": 0.00015142857142857143,
29
- "logps/chosen": -323.0667419433594,
30
- "logps/rejected": -331.9349365234375,
31
- "loss": 0.427,
32
- "rewards/chosen": -5.341065883636475,
33
- "rewards/margins": 2.56813383102417,
34
- "rewards/rejected": -8.01456356048584,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
- "eval_kl": 0.31787964701652527,
40
- "eval_logps/chosen": -276.96771240234375,
41
- "eval_logps/rejected": -285.83453369140625,
42
- "eval_loss": 0.4442897439002991,
43
- "eval_rewards/chosen": -2.62730073928833,
44
- "eval_rewards/margins": 1.0439932346343994,
45
- "eval_rewards/rejected": -3.7775051593780518,
46
- "eval_runtime": 357.2813,
47
- "eval_samples_per_second": 0.84,
48
- "eval_steps_per_second": 0.21,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
- "grad_norm": 1.9756174879148602e-05,
54
- "kl": 0.9693483114242554,
55
- "learning_rate": 0.00012571428571428572,
56
- "logps/chosen": -549.1575927734375,
57
- "logps/rejected": -583.3649291992188,
58
- "loss": 0.4167,
59
- "rewards/chosen": -29.825927734375,
60
- "rewards/margins": 3.2253201007843018,
61
- "rewards/rejected": -32.24267578125,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
- "grad_norm": 1.4999230870671454e-06,
67
- "kl": 0.0,
68
- "learning_rate": 9.714285714285715e-05,
69
- "logps/chosen": -1577.5938720703125,
70
- "logps/rejected": -1497.0914306640625,
71
- "loss": 0.4625,
72
- "rewards/chosen": -132.58737182617188,
73
- "rewards/margins": -8.486028671264648,
74
- "rewards/rejected": -123.95598602294922,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
- "grad_norm": 0.0,
80
- "kl": 0.0,
81
- "learning_rate": 6.857142857142858e-05,
82
- "logps/chosen": -1709.744140625,
83
- "logps/rejected": -1653.6820068359375,
84
- "loss": 0.425,
85
- "rewards/chosen": -144.38365173339844,
86
- "rewards/margins": -5.729578971862793,
87
- "rewards/rejected": -138.68692016601562,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
- "eval_kl": 0.0,
93
- "eval_logps/chosen": -1588.1917724609375,
94
- "eval_logps/rejected": -1473.307373046875,
95
- "eval_loss": 0.4733360707759857,
96
- "eval_rewards/chosen": -133.74969482421875,
97
- "eval_rewards/margins": -10.205015182495117,
98
- "eval_rewards/rejected": -122.52478790283203,
99
- "eval_runtime": 356.0942,
100
- "eval_samples_per_second": 0.842,
101
- "eval_steps_per_second": 0.211,
102
  "step": 100
103
  }
104
  ],
 
1
  {
2
+ "best_metric": 0.3698837161064148,
3
  "best_model_checkpoint": "./mixstral/05-04-24-Weni-WeniGPT-Agents-Mixstral-Instruct-2.0.1-KTO_KTO with Agents 1.2.0 dataset and Mixstral model, with tokenization zephyr chat template-3_max_steps-145_batch_16_2024-04-05_ppid_9/checkpoint-100",
4
  "epoch": 0.684931506849315,
5
  "eval_steps": 50,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.14,
13
+ "grad_norm": 4.052237033843994,
14
+ "kl": 10.023146629333496,
15
+ "learning_rate": 0.0001785714285714286,
16
+ "logps/chosen": -232.72720336914062,
17
+ "logps/rejected": -241.88575744628906,
18
+ "loss": 0.4316,
19
+ "rewards/chosen": 1.2224007844924927,
20
+ "rewards/margins": 1.0828871726989746,
21
+ "rewards/rejected": 0.279308944940567,
22
  "step": 20
23
  },
24
  {
25
  "epoch": 0.27,
26
+ "grad_norm": 2.734194278717041,
27
+ "kl": 10.954643249511719,
28
+ "learning_rate": 0.00015000000000000001,
29
+ "logps/chosen": -228.13992309570312,
30
+ "logps/rejected": -270.3298034667969,
31
+ "loss": 0.4077,
32
+ "rewards/chosen": 1.687286615371704,
33
+ "rewards/margins": 1.9949144124984741,
34
+ "rewards/rejected": -0.2780403196811676,
35
  "step": 40
36
  },
37
  {
38
  "epoch": 0.34,
39
+ "eval_kl": 0.0,
40
+ "eval_logps/chosen": -315.1575927734375,
41
+ "eval_logps/rejected": -328.9866027832031,
42
+ "eval_loss": 0.466037392616272,
43
+ "eval_rewards/chosen": -7.166321754455566,
44
+ "eval_rewards/margins": 2.8236684799194336,
45
+ "eval_rewards/rejected": -10.133459091186523,
46
+ "eval_runtime": 353.626,
47
+ "eval_samples_per_second": 0.848,
48
+ "eval_steps_per_second": 0.212,
49
  "step": 50
50
  },
51
  {
52
  "epoch": 0.41,
53
+ "grad_norm": 1.9602097272872925,
54
+ "kl": 1.5916956663131714,
55
+ "learning_rate": 0.00012142857142857143,
56
+ "logps/chosen": -268.18023681640625,
57
+ "logps/rejected": -304.6438903808594,
58
+ "loss": 0.4028,
59
+ "rewards/chosen": -2.1896169185638428,
60
+ "rewards/margins": 4.930688858032227,
61
+ "rewards/rejected": -6.980587482452393,
62
  "step": 60
63
  },
64
  {
65
  "epoch": 0.55,
66
+ "grad_norm": 1.9362258911132812,
67
+ "kl": 3.6249618530273438,
68
+ "learning_rate": 9.285714285714286e-05,
69
+ "logps/chosen": -263.2818603515625,
70
+ "logps/rejected": -308.0414733886719,
71
+ "loss": 0.3873,
72
+ "rewards/chosen": -0.1703629493713379,
73
+ "rewards/margins": 5.487633228302002,
74
+ "rewards/rejected": -5.359496116638184,
75
  "step": 80
76
  },
77
  {
78
  "epoch": 0.68,
79
+ "grad_norm": 2.322431802749634,
80
+ "kl": 0.9517351388931274,
81
+ "learning_rate": 6.428571428571429e-05,
82
+ "logps/chosen": -240.21408081054688,
83
+ "logps/rejected": -329.3759765625,
84
+ "loss": 0.3351,
85
+ "rewards/chosen": 0.027771174907684326,
86
+ "rewards/margins": 7.602840423583984,
87
+ "rewards/rejected": -7.7202911376953125,
88
  "step": 100
89
  },
90
  {
91
  "epoch": 0.68,
92
+ "eval_kl": 0.48766180872917175,
93
+ "eval_logps/chosen": -261.78826904296875,
94
+ "eval_logps/rejected": -295.7093505859375,
95
+ "eval_loss": 0.3698837161064148,
96
+ "eval_rewards/chosen": -1.8293884992599487,
97
+ "eval_rewards/margins": 4.808248043060303,
98
+ "eval_rewards/rejected": -6.805734634399414,
99
+ "eval_runtime": 352.0516,
100
+ "eval_samples_per_second": 0.852,
101
+ "eval_steps_per_second": 0.213,
102
  "step": 100
103
  }
104
  ],
checkpoint-100/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48c1d2ff27fc85582789e23e09bfc9597b564ecdbe5db0b91b68b324bb1290f0
3
  size 5752
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e79b6a8f4655d052842d9d7abce8bef41fb97b532d7202e19512d995cc23d94c
3
  size 5752