mgh6 commited on
Commit
a6bdbbe
·
verified ·
1 Parent(s): 1d5596c

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "tattabio/gLM2_650M",
3
+ "architectures": [
4
+ "gLM2ForMaskedLM"
5
+ ],
6
+ "auto_map": {
7
+ "AutoConfig": "configuration_glm2.gLM2Config",
8
+ "AutoModel": "modeling_glm2.gLM2Model",
9
+ "AutoModelForMaskedLM": "modeling_glm2.gLM2ForMaskedLM"
10
+ },
11
+ "depth": 33,
12
+ "dim": 1280,
13
+ "ffn_dim_multiplier": null,
14
+ "heads": 20,
15
+ "model_type": "gLM2",
16
+ "norm_eps": 1e-05,
17
+ "swiglu_multiple_of": 256,
18
+ "torch_dtype": "float32",
19
+ "transformers_version": "4.45.2",
20
+ "vocab_size": 37
21
+ }
last-checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b14f81830b72149ea0d8c4100992492b70830c0be8b1cf197598950492d5dbd
3
+ size 2682482800
last-checkpoint/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f55c73a5198d71456c1007c1e2b3c9a0187abff0edd38f53f302c9bc1425c1a9
3
+ size 5365108834
last-checkpoint/rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e59ba1bafb88d66ae26193100490c9a785a11de83c469025dd06b157587f9a12
3
+ size 15024
last-checkpoint/rng_state_1.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4265d5553bfb55a8aabc39f0360eb599499fd97cae117d1fd7b125b5d6a87d7a
3
+ size 15024
last-checkpoint/rng_state_2.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2338b1fc8e1dc5158561fa405d6ffdd4c3a51a1e4efc57735695a177e000c2ac
3
+ size 15024
last-checkpoint/rng_state_3.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d2a43d2cfd5f337ec43b74fcfb4a4040703c5eddc8ac6f4f043ce24274fe35f
3
+ size 15024
last-checkpoint/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82975d48b3a94464d8ca425e4fcde1c6037da4312b3dd05fa799b2e3c703a0d
3
+ size 1064
last-checkpoint/trainer_state.json ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.007675232145791034,
5
+ "eval_steps": 100,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.003837616072895517,
13
+ "grad_norm": 4.467618942260742,
14
+ "learning_rate": 0.0009961621123733496,
15
+ "loss": 1.752,
16
+ "step": 50
17
+ },
18
+ {
19
+ "epoch": 0.007675232145791034,
20
+ "grad_norm": 1.9758433103561401,
21
+ "learning_rate": 0.0009923242247466995,
22
+ "loss": 1.2354,
23
+ "step": 100
24
+ }
25
+ ],
26
+ "logging_steps": 50,
27
+ "max_steps": 13028,
28
+ "num_input_tokens_seen": 0,
29
+ "num_train_epochs": 1,
30
+ "save_steps": 100,
31
+ "stateful_callbacks": {
32
+ "TrainerControl": {
33
+ "args": {
34
+ "should_epoch_stop": false,
35
+ "should_evaluate": false,
36
+ "should_log": false,
37
+ "should_save": true,
38
+ "should_training_stop": false
39
+ },
40
+ "attributes": {}
41
+ }
42
+ },
43
+ "total_flos": 1.712602232001659e+17,
44
+ "train_batch_size": 2,
45
+ "trial_name": null,
46
+ "trial_params": null
47
+ }
last-checkpoint/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa210eeb1786e732e3ecfb64143949504cb9722bc191a603cef1c219287b0f6f
3
+ size 5240