jaiwithani commited on
Commit
a9a8767
·
verified ·
1 Parent(s): 7f075e0

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLM"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 1,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 12,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 48,
13
+ "max_position_embeddings": 512,
14
+ "model_type": "llama",
15
+ "num_attention_heads": 2,
16
+ "num_hidden_layers": 1,
17
+ "num_key_value_heads": 1,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000.0,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "float32",
24
+ "transformers_version": "4.40.0",
25
+ "use_cache": true,
26
+ "vocab_size": 4096
27
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "transformers_version": "4.40.0"
6
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b4ddc18eb61ce665e0d7f0e4366e9c81aad8a91bb0849b72bf28c82ea3218cc
3
+ size 403216
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36dacbcf230476b111fceffb7281733e54d2ae73aa1ac588e96092cdd1af7ec6
3
+ size 813674
run_context.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "device": "cuda",
3
+ "torch_version": "2.1.2+cu121",
4
+ "delphi_version": "0.1.1",
5
+ "transformers_version": "4.40.0",
6
+ "os": "#29~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Thu Apr 4 14:39:20 UTC 2"
7
+ }
training_config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_config": {
3
+ "model_class": "LlamaForCausalLM",
4
+ "vocab_size": 4096,
5
+ "hidden_act": "silu",
6
+ "max_position_embeddings": 512,
7
+ "initializer_range": 0.02,
8
+ "rms_norm_eps": 1e-06,
9
+ "bos_token_id": 0,
10
+ "eos_token_id": 1,
11
+ "tie_word_embeddings": false,
12
+ "rope_theta": 10000.0,
13
+ "rope_scaling": null,
14
+ "attention_bias": false,
15
+ "attention_dropout": 0.0,
16
+ "hidden_size": 12,
17
+ "intermediate_size": 48,
18
+ "num_attention_heads": 2,
19
+ "num_hidden_layers": 1,
20
+ "num_key_value_heads": 1
21
+ },
22
+ "max_seq_len": 512,
23
+ "run_name": "2024_04_27_03_51_29",
24
+ "output_dir": "/home/jai/.local/share/delphi/2024_04_27_03_51_29",
25
+ "device": "auto",
26
+ "checkpoint_interval": 400,
27
+ "extra_checkpoint_iters": [
28
+ 1,
29
+ 2,
30
+ 4,
31
+ 8,
32
+ 16,
33
+ 32,
34
+ 64,
35
+ 128,
36
+ 256,
37
+ 512
38
+ ],
39
+ "log_interval": 40,
40
+ "eval_iters": 10,
41
+ "resume_from_path": null,
42
+ "batch_size": 256,
43
+ "max_epochs": 10,
44
+ "grad_clip": 1.0,
45
+ "gradient_accumulation_steps": 1,
46
+ "adam": {
47
+ "learning_rate": 0.0005,
48
+ "weight_decay": 0.1,
49
+ "beta1": 0.9,
50
+ "beta2": 0.95,
51
+ "decay_lr": true,
52
+ "warmup_iters": 1000,
53
+ "min_lr": 0.0
54
+ },
55
+ "batch_ordering_seed": 1337,
56
+ "torch_seed": 42,
57
+ "save_optimizer": true,
58
+ "dataset": {
59
+ "name": "delphi-suite/stories-tokenized",
60
+ "feature": "tokens",
61
+ "train_split": "train",
62
+ "validation_split": "validation"
63
+ },
64
+ "wandb": null,
65
+ "out_repo_id": "delphi-demo/llama-100k",
66
+ "debug_config": {
67
+ "no_training": false,
68
+ "no_eval": false
69
+ }
70
+ }
training_state.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "iter_num": 41900,
3
+ "lr": 7.375018928890143e-13,
4
+ "epoch": 9,
5
+ "step": 4189
6
+ }