iamnguyen commited on
Commit
9d2e01a
·
verified ·
1 Parent(s): 00618b7

Training in progress, step 12, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:425cdb8f526e670631680f9ba49af2b403eb6046620ed6f981443bd12fe48aaa
3
  size 2964338224
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21a2f62efd0d71d0d134266e515dda3b88038f99e0b24c6a836737401f2339a3
3
  size 2964338224
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57a6657239ae296db292588667b26f492809b84e60b1581a4a132e38af548e6d
3
  size 1485440604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9bb165d055492bacaa7fc7f6a4b7aa35f27f884ae2282741026ab19d8357fdb
3
  size 1485440604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d79a1aac25962f14e4f652b048b5c2224eafccb90ae4f3d833b67a4a6590cbac
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c200f8fdcde78c415e4babf1b6f200bbf13323757b5b248ace34314bfbe3fd44
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.45960502692998206,
5
  "eval_steps": 500,
6
- "global_step": 8,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -63,6 +63,34 @@
63
  "learning_rate": 4e-05,
64
  "loss": 2.2126,
65
  "step": 8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
  ],
68
  "logging_steps": 1,
@@ -82,7 +110,7 @@
82
  "attributes": {}
83
  }
84
  },
85
- "total_flos": 5313812434722816.0,
86
  "train_batch_size": 2,
87
  "trial_name": null,
88
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6894075403949731,
5
  "eval_steps": 500,
6
+ "global_step": 12,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
63
  "learning_rate": 4e-05,
64
  "loss": 2.2126,
65
  "step": 8
66
+ },
67
+ {
68
+ "epoch": 0.5170556552962298,
69
+ "grad_norm": 2.069558620452881,
70
+ "learning_rate": 4.5e-05,
71
+ "loss": 2.2389,
72
+ "step": 9
73
+ },
74
+ {
75
+ "epoch": 0.5745062836624776,
76
+ "grad_norm": 1.7339693307876587,
77
+ "learning_rate": 5e-05,
78
+ "loss": 2.1964,
79
+ "step": 10
80
+ },
81
+ {
82
+ "epoch": 0.6319569120287253,
83
+ "grad_norm": 1.6608643531799316,
84
+ "learning_rate": 4.992664502959351e-05,
85
+ "loss": 2.2079,
86
+ "step": 11
87
+ },
88
+ {
89
+ "epoch": 0.6894075403949731,
90
+ "grad_norm": 1.642751932144165,
91
+ "learning_rate": 4.970701059450872e-05,
92
+ "loss": 2.2359,
93
+ "step": 12
94
  }
95
  ],
96
  "logging_steps": 1,
 
110
  "attributes": {}
111
  }
112
  },
113
+ "total_flos": 8132581322686464.0,
114
  "train_batch_size": 2,
115
  "trial_name": null,
116
  "trial_params": null