RodrigoSalazar-U commited on
Commit
f9ec960
·
verified ·
1 Parent(s): ee61721

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:119ed194d1366f613f4ff5a50dc5adf9d1419dc19a0e5fdbe52a95ed44c8baaf
3
  size 4785762744
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f49d74a21c9de4957df85ff3d5da7ab77e561aa8097ff2aa90937236812ddf5a
3
  size 4785762744
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b2d06d763f64e7867366113993b930392c5a1c1cf151c1a52e08d2baf51b2ad
3
  size 3497859804
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78bc5d7f250a29484cb11644e9c6bdb27e7569607b8207b8f5e90054bdb57733
3
  size 3497859804
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9b2d59715331ee8c33fa0d2ed55a484e9c2ac88210adb8737eec56220eee082c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f10c1e950ccc3ca6ce28644d055ede3dfe6763dec3f800e5ed80a1dc9e119762
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2738225629791895,
5
  "eval_steps": 500,
6
- "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -42,6 +42,41 @@
42
  "learning_rate": 9.124087591240877e-05,
43
  "loss": 1.6711,
44
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  }
46
  ],
47
  "logging_steps": 100,
@@ -61,7 +96,7 @@
61
  "attributes": {}
62
  }
63
  },
64
- "total_flos": 2.4419524014322483e+17,
65
  "train_batch_size": 16,
66
  "trial_name": null,
67
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.547645125958379,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
42
  "learning_rate": 9.124087591240877e-05,
43
  "loss": 1.6711,
44
  "step": 500
45
+ },
46
+ {
47
+ "epoch": 0.32858707557502737,
48
+ "grad_norm": 2.219165802001953,
49
+ "learning_rate": 9.997255186358079e-05,
50
+ "loss": 1.6651,
51
+ "step": 600
52
+ },
53
+ {
54
+ "epoch": 0.3833515881708653,
55
+ "grad_norm": 2.6104276180267334,
56
+ "learning_rate": 9.976563458663239e-05,
57
+ "loss": 1.5924,
58
+ "step": 700
59
+ },
60
+ {
61
+ "epoch": 0.43811610076670315,
62
+ "grad_norm": 2.4602837562561035,
63
+ "learning_rate": 9.93567000457336e-05,
64
+ "loss": 1.5787,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 0.4928806133625411,
69
+ "grad_norm": 2.33683705329895,
70
+ "learning_rate": 9.874740825864108e-05,
71
+ "loss": 1.5269,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 0.547645125958379,
76
+ "grad_norm": 3.0263447761535645,
77
+ "learning_rate": 9.794023256786919e-05,
78
+ "loss": 1.5251,
79
+ "step": 1000
80
  }
81
  ],
82
  "logging_steps": 100,
 
96
  "attributes": {}
97
  }
98
  },
99
+ "total_flos": 4.845362695616594e+17,
100
  "train_batch_size": 16,
101
  "trial_name": null,
102
  "trial_params": null