gsmyrnis commited on
Commit
7f7cc0a
·
verified ·
1 Parent(s): 3450ead

Training in progress, epoch 1

Browse files
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f402c47e6c1f03a0044ac4c8bf4b573c42a66d14ee0077e264f38987c036024
3
  size 4976698672
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fca272e7a0612b270d4b1ba3c1e28a3ff15cfa27f448106f26b1ffa55fb9af9
3
  size 4976698672
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6951f9329aeeb4210125bd35583e9b4ccf79b72499adcce105145c51c240717f
3
  size 4999802720
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12a436756d939703344dd074fc1528d05882d97ebab5d912efab6c4510bffdb8
3
  size 4999802720
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b91f9826e9e2d8b2a8297811defac0f8d8b9f82997b89e024590c7776da62a32
3
  size 4915916176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ea610580c61afd1591a021e4df55bf06dae53fcb7bf3f2647bf9d1338a4bba2
3
  size 4915916176
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb04ba1abafb82b71ac583ae14448155a181a7ef835098a3802243043d395a2a
3
  size 1168138808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eddf6c99052ce3f283bb3027ac785653b8ccc077d4c3c5d274f1fde40401bd93
3
  size 1168138808
trainer_log.jsonl CHANGED
@@ -1,25 +1,25 @@
1
- {"current_steps": 10, "total_steps": 216, "loss": 1.0335, "lr": 5e-06, "epoch": 0.1388888888888889, "percentage": 4.63, "elapsed_time": "0:01:01", "remaining_time": "0:21:07"}
2
- {"current_steps": 20, "total_steps": 216, "loss": 0.9306, "lr": 5e-06, "epoch": 0.2777777777777778, "percentage": 9.26, "elapsed_time": "0:02:01", "remaining_time": "0:19:51"}
3
- {"current_steps": 30, "total_steps": 216, "loss": 0.8997, "lr": 5e-06, "epoch": 0.4166666666666667, "percentage": 13.89, "elapsed_time": "0:03:02", "remaining_time": "0:18:49"}
4
- {"current_steps": 40, "total_steps": 216, "loss": 0.8769, "lr": 5e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:04:03", "remaining_time": "0:17:52"}
5
- {"current_steps": 50, "total_steps": 216, "loss": 0.8675, "lr": 5e-06, "epoch": 0.6944444444444444, "percentage": 23.15, "elapsed_time": "0:05:04", "remaining_time": "0:16:50"}
6
- {"current_steps": 60, "total_steps": 216, "loss": 0.8448, "lr": 5e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "0:06:05", "remaining_time": "0:15:49"}
7
- {"current_steps": 70, "total_steps": 216, "loss": 0.8431, "lr": 5e-06, "epoch": 0.9722222222222222, "percentage": 32.41, "elapsed_time": "0:07:06", "remaining_time": "0:14:49"}
8
- {"current_steps": 72, "total_steps": 216, "eval_loss": 0.8423281311988831, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:07:26", "remaining_time": "0:14:52"}
9
- {"current_steps": 80, "total_steps": 216, "loss": 0.8048, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:09:40", "remaining_time": "0:16:26"}
10
- {"current_steps": 90, "total_steps": 216, "loss": 0.7944, "lr": 5e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:10:41", "remaining_time": "0:14:58"}
11
- {"current_steps": 100, "total_steps": 216, "loss": 0.7938, "lr": 5e-06, "epoch": 1.3888888888888888, "percentage": 46.3, "elapsed_time": "0:11:42", "remaining_time": "0:13:34"}
12
- {"current_steps": 110, "total_steps": 216, "loss": 0.7881, "lr": 5e-06, "epoch": 1.5277777777777777, "percentage": 50.93, "elapsed_time": "0:12:43", "remaining_time": "0:12:15"}
13
- {"current_steps": 120, "total_steps": 216, "loss": 0.788, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:13:44", "remaining_time": "0:10:59"}
14
- {"current_steps": 130, "total_steps": 216, "loss": 0.7834, "lr": 5e-06, "epoch": 1.8055555555555556, "percentage": 60.19, "elapsed_time": "0:14:45", "remaining_time": "0:09:45"}
15
- {"current_steps": 140, "total_steps": 216, "loss": 0.7879, "lr": 5e-06, "epoch": 1.9444444444444444, "percentage": 64.81, "elapsed_time": "0:15:47", "remaining_time": "0:08:34"}
16
- {"current_steps": 144, "total_steps": 216, "eval_loss": 0.8222501277923584, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:16:18", "remaining_time": "0:08:09"}
17
- {"current_steps": 150, "total_steps": 216, "loss": 0.7562, "lr": 5e-06, "epoch": 2.0833333333333335, "percentage": 69.44, "elapsed_time": "0:18:24", "remaining_time": "0:08:06"}
18
- {"current_steps": 160, "total_steps": 216, "loss": 0.7378, "lr": 5e-06, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "0:19:24", "remaining_time": "0:06:47"}
19
- {"current_steps": 170, "total_steps": 216, "loss": 0.735, "lr": 5e-06, "epoch": 2.361111111111111, "percentage": 78.7, "elapsed_time": "0:20:26", "remaining_time": "0:05:31"}
20
- {"current_steps": 180, "total_steps": 216, "loss": 0.7372, "lr": 5e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:21:27", "remaining_time": "0:04:17"}
21
- {"current_steps": 190, "total_steps": 216, "loss": 0.7373, "lr": 5e-06, "epoch": 2.638888888888889, "percentage": 87.96, "elapsed_time": "0:22:28", "remaining_time": "0:03:04"}
22
- {"current_steps": 200, "total_steps": 216, "loss": 0.7381, "lr": 5e-06, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "0:23:29", "remaining_time": "0:01:52"}
23
- {"current_steps": 210, "total_steps": 216, "loss": 0.7411, "lr": 5e-06, "epoch": 2.9166666666666665, "percentage": 97.22, "elapsed_time": "0:24:30", "remaining_time": "0:00:42"}
24
- {"current_steps": 216, "total_steps": 216, "eval_loss": 0.8201099634170532, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:26:40", "remaining_time": "0:00:00"}
25
- {"current_steps": 216, "total_steps": 216, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:28:16", "remaining_time": "0:00:00"}
 
1
+ {"current_steps": 10, "total_steps": 216, "loss": 1.0393, "lr": 5e-06, "epoch": 0.1388888888888889, "percentage": 4.63, "elapsed_time": "0:00:59", "remaining_time": "0:20:22"}
2
+ {"current_steps": 20, "total_steps": 216, "loss": 0.9323, "lr": 5e-06, "epoch": 0.2777777777777778, "percentage": 9.26, "elapsed_time": "0:01:57", "remaining_time": "0:19:09"}
3
+ {"current_steps": 30, "total_steps": 216, "loss": 0.8884, "lr": 5e-06, "epoch": 0.4166666666666667, "percentage": 13.89, "elapsed_time": "0:02:55", "remaining_time": "0:18:07"}
4
+ {"current_steps": 40, "total_steps": 216, "loss": 0.8695, "lr": 5e-06, "epoch": 0.5555555555555556, "percentage": 18.52, "elapsed_time": "0:03:53", "remaining_time": "0:17:06"}
5
+ {"current_steps": 50, "total_steps": 216, "loss": 0.8623, "lr": 5e-06, "epoch": 0.6944444444444444, "percentage": 23.15, "elapsed_time": "0:04:51", "remaining_time": "0:16:07"}
6
+ {"current_steps": 60, "total_steps": 216, "loss": 0.8411, "lr": 5e-06, "epoch": 0.8333333333333334, "percentage": 27.78, "elapsed_time": "0:05:49", "remaining_time": "0:15:08"}
7
+ {"current_steps": 70, "total_steps": 216, "loss": 0.8407, "lr": 5e-06, "epoch": 0.9722222222222222, "percentage": 32.41, "elapsed_time": "0:06:47", "remaining_time": "0:14:10"}
8
+ {"current_steps": 72, "total_steps": 216, "eval_loss": 0.8403190970420837, "epoch": 1.0, "percentage": 33.33, "elapsed_time": "0:07:06", "remaining_time": "0:14:12"}
9
+ {"current_steps": 80, "total_steps": 216, "loss": 0.802, "lr": 5e-06, "epoch": 1.1111111111111112, "percentage": 37.04, "elapsed_time": "0:09:20", "remaining_time": "0:15:52"}
10
+ {"current_steps": 90, "total_steps": 216, "loss": 0.7909, "lr": 5e-06, "epoch": 1.25, "percentage": 41.67, "elapsed_time": "0:10:18", "remaining_time": "0:14:25"}
11
+ {"current_steps": 100, "total_steps": 216, "loss": 0.7892, "lr": 5e-06, "epoch": 1.3888888888888888, "percentage": 46.3, "elapsed_time": "0:11:16", "remaining_time": "0:13:04"}
12
+ {"current_steps": 110, "total_steps": 216, "loss": 0.7861, "lr": 5e-06, "epoch": 1.5277777777777777, "percentage": 50.93, "elapsed_time": "0:12:14", "remaining_time": "0:11:47"}
13
+ {"current_steps": 120, "total_steps": 216, "loss": 0.7858, "lr": 5e-06, "epoch": 1.6666666666666665, "percentage": 55.56, "elapsed_time": "0:13:12", "remaining_time": "0:10:34"}
14
+ {"current_steps": 130, "total_steps": 216, "loss": 0.7817, "lr": 5e-06, "epoch": 1.8055555555555556, "percentage": 60.19, "elapsed_time": "0:14:10", "remaining_time": "0:09:22"}
15
+ {"current_steps": 140, "total_steps": 216, "loss": 0.7864, "lr": 5e-06, "epoch": 1.9444444444444444, "percentage": 64.81, "elapsed_time": "0:15:08", "remaining_time": "0:08:13"}
16
+ {"current_steps": 144, "total_steps": 216, "eval_loss": 0.8217753171920776, "epoch": 2.0, "percentage": 66.67, "elapsed_time": "0:15:38", "remaining_time": "0:07:49"}
17
+ {"current_steps": 150, "total_steps": 216, "loss": 0.7544, "lr": 5e-06, "epoch": 2.0833333333333335, "percentage": 69.44, "elapsed_time": "0:17:40", "remaining_time": "0:07:46"}
18
+ {"current_steps": 160, "total_steps": 216, "loss": 0.7362, "lr": 5e-06, "epoch": 2.2222222222222223, "percentage": 74.07, "elapsed_time": "0:18:38", "remaining_time": "0:06:31"}
19
+ {"current_steps": 170, "total_steps": 216, "loss": 0.7332, "lr": 5e-06, "epoch": 2.361111111111111, "percentage": 78.7, "elapsed_time": "0:19:36", "remaining_time": "0:05:18"}
20
+ {"current_steps": 180, "total_steps": 216, "loss": 0.7358, "lr": 5e-06, "epoch": 2.5, "percentage": 83.33, "elapsed_time": "0:20:34", "remaining_time": "0:04:06"}
21
+ {"current_steps": 190, "total_steps": 216, "loss": 0.7357, "lr": 5e-06, "epoch": 2.638888888888889, "percentage": 87.96, "elapsed_time": "0:21:33", "remaining_time": "0:02:56"}
22
+ {"current_steps": 200, "total_steps": 216, "loss": 0.7366, "lr": 5e-06, "epoch": 2.7777777777777777, "percentage": 92.59, "elapsed_time": "0:22:31", "remaining_time": "0:01:48"}
23
+ {"current_steps": 210, "total_steps": 216, "loss": 0.7397, "lr": 5e-06, "epoch": 2.9166666666666665, "percentage": 97.22, "elapsed_time": "0:23:29", "remaining_time": "0:00:40"}
24
+ {"current_steps": 216, "total_steps": 216, "eval_loss": 0.8195396065711975, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:25:41", "remaining_time": "0:00:00"}
25
+ {"current_steps": 216, "total_steps": 216, "epoch": 3.0, "percentage": 100.0, "elapsed_time": "0:27:06", "remaining_time": "0:00:00"}
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a5884821f54204385a48899fcc4d4eae481c642b782371206cfb0b59c80517d
3
  size 7288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce99df18f055dea5d6e1d8f2416d387828f144a15d80bacb8c0a07c1202a202
3
  size 7288