gate369 commited on
Commit
2015c27
·
verified ·
1 Parent(s): 0a0ad1e

Training in progress, step 1000

Browse files
Files changed (39) hide show
  1. config.json +1 -1
  2. final_model/config.json +26 -0
  3. final_model/generation_config.json +6 -0
  4. final_model/model.safetensors +3 -0
  5. final_model/special_tokens_map.json +30 -0
  6. final_model/tokenizer.json +0 -0
  7. final_model/tokenizer.model +3 -0
  8. final_model/tokenizer_config.json +42 -0
  9. final_model/training_args.bin +3 -0
  10. model.safetensors +1 -1
  11. runs/Jun24_02-19-21_9476b84f7cc3/events.out.tfevents.1719195601.9476b84f7cc3.891.0 +3 -0
  12. runs/Jun24_02-22-03_9476b84f7cc3/events.out.tfevents.1719195730.9476b84f7cc3.3389.0 +3 -0
  13. runs/Jun24_02-31-50_9476b84f7cc3/events.out.tfevents.1719196316.9476b84f7cc3.3389.1 +3 -0
  14. runs/Jun24_03-01-05_9476b84f7cc3/events.out.tfevents.1719198072.9476b84f7cc3.3389.2 +3 -0
  15. runs/Jun24_03-04-45_9476b84f7cc3/events.out.tfevents.1719198291.9476b84f7cc3.14166.0 +3 -0
  16. runs/Jun24_03-05-11_9476b84f7cc3/events.out.tfevents.1719198320.9476b84f7cc3.14166.1 +3 -0
  17. runs/Jun24_07-18-11_9476b84f7cc3/events.out.tfevents.1719213497.9476b84f7cc3.77049.0 +3 -0
  18. runs/Jun24_07-18-59_9476b84f7cc3/events.out.tfevents.1719213548.9476b84f7cc3.77049.1 +3 -0
  19. runs/Jun24_07-19-56_9476b84f7cc3/events.out.tfevents.1719213604.9476b84f7cc3.77049.2 +3 -0
  20. runs/Jun24_07-23-13_9476b84f7cc3/events.out.tfevents.1719213802.9476b84f7cc3.77049.3 +3 -0
  21. runs/Jun24_07-24-02_9476b84f7cc3/events.out.tfevents.1719213851.9476b84f7cc3.84365.0 +3 -0
  22. runs/Jun24_07-24-52_9476b84f7cc3/events.out.tfevents.1719213903.9476b84f7cc3.84365.1 +3 -0
  23. runs/Jun24_07-25-54_9476b84f7cc3/events.out.tfevents.1719213962.9476b84f7cc3.85066.0 +3 -0
  24. runs/Jun24_07-27-03_9476b84f7cc3/events.out.tfevents.1719214028.9476b84f7cc3.85577.0 +3 -0
  25. runs/Jun24_07-30-34_9476b84f7cc3/events.out.tfevents.1719214242.9476b84f7cc3.86614.0 +3 -0
  26. runs/Jun24_07-31-23_9476b84f7cc3/events.out.tfevents.1719214289.9476b84f7cc3.87793.0 +3 -0
  27. runs/Jun24_08-12-53_9476b84f7cc3/events.out.tfevents.1719216782.9476b84f7cc3.87793.1 +3 -0
  28. runs/Jun24_08-55-18_9476b84f7cc3/events.out.tfevents.1719219326.9476b84f7cc3.87793.2 +3 -0
  29. runs/Jun24_09-03-21_9476b84f7cc3/events.out.tfevents.1719219812.9476b84f7cc3.87793.3 +3 -0
  30. runs/Jun24_09-24-07_9476b84f7cc3/events.out.tfevents.1719221050.9476b84f7cc3.87793.4 +3 -0
  31. runs/Jun24_09-27-32_9476b84f7cc3/events.out.tfevents.1719221258.9476b84f7cc3.119298.0 +3 -0
  32. runs/Jun24_10-11-56_9476b84f7cc3/events.out.tfevents.1719223928.9476b84f7cc3.119298.1 +3 -0
  33. runs/Jun24_10-39-02_9476b84f7cc3/events.out.tfevents.1719225550.9476b84f7cc3.138575.0 +3 -0
  34. runs/Jun24_17-20-19_9476b84f7cc3/events.out.tfevents.1719249628.9476b84f7cc3.138575.1 +3 -0
  35. runs/Jun24_17-28-09_9476b84f7cc3/events.out.tfevents.1719250100.9476b84f7cc3.138575.2 +3 -0
  36. runs/Jun24_17-52-03_9476b84f7cc3/events.out.tfevents.1719251532.9476b84f7cc3.138575.3 +3 -0
  37. runs/Jun24_17-57-40_9476b84f7cc3/events.out.tfevents.1719251869.9476b84f7cc3.138575.4 +3 -0
  38. runs/Jun24_18-16-40_9476b84f7cc3/events.out.tfevents.1719253011.9476b84f7cc3.138575.5 +3 -0
  39. training_args.bin +2 -2
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "liminerity/Bitnet-Mistral.0.2-v5",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
 
1
  {
2
+ "_name_or_path": "liminerity/Bitnet-Mistral.0.2-v3",
3
  "architectures": [
4
  "MistralForCausalLM"
5
  ],
final_model/config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "liminerity/Bitnet-Mistral.0.2-v5",
3
+ "architectures": [
4
+ "MistralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 256,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 512,
13
+ "max_position_embeddings": 4096,
14
+ "model_type": "mistral",
15
+ "num_attention_heads": 8,
16
+ "num_hidden_layers": 24,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": null,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.41.2",
24
+ "use_cache": true,
25
+ "vocab_size": 32000
26
+ }
final_model/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.41.2"
6
+ }
final_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2723b55dc532e31d62acf4ec0eeada693872696fd7e62b7c462b38a2802c41e2
3
+ size 128524840
final_model/special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
final_model/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
final_model/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
final_model/tokenizer_config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": true,
35
+ "model_max_length": 1000000000000000019884624838656,
36
+ "pad_token": "</s>",
37
+ "sp_model_kwargs": {},
38
+ "spaces_between_special_tokens": false,
39
+ "tokenizer_class": "LlamaTokenizer",
40
+ "unk_token": "<unk>",
41
+ "use_default_system_prompt": false
42
+ }
final_model/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fe39a7e59cfc34bf787359c8621875a8af28b822ffcc77eace41356553656a4
3
+ size 5112
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2723b55dc532e31d62acf4ec0eeada693872696fd7e62b7c462b38a2802c41e2
3
  size 128524840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc314570a84931d72186e6dd236fbba46c68749909b5ad0826316e72549a8f2b
3
  size 128524840
runs/Jun24_02-19-21_9476b84f7cc3/events.out.tfevents.1719195601.9476b84f7cc3.891.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b70e4e4ca07e66a050192527675b87cd742af6a721abc26778fbd9f208b47be
3
+ size 4184
runs/Jun24_02-22-03_9476b84f7cc3/events.out.tfevents.1719195730.9476b84f7cc3.3389.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d266891d2260578e0c8030706495e7a965193c247a3706dcbbbd1064b8c6972
3
+ size 41942
runs/Jun24_02-31-50_9476b84f7cc3/events.out.tfevents.1719196316.9476b84f7cc3.3389.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba423311d5b6d2ab1007186b752751a7343a2f6af55f831beed1d445e56b94a3
3
+ size 52809
runs/Jun24_03-01-05_9476b84f7cc3/events.out.tfevents.1719198072.9476b84f7cc3.3389.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:230074bd62b07a3616763f13473a0bb517ce7d10929e046fd0ed589a0f80f368
3
+ size 5507
runs/Jun24_03-04-45_9476b84f7cc3/events.out.tfevents.1719198291.9476b84f7cc3.14166.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b086fc8d4aa4084269f8f8cb1b97db6b26c4590dbb7f74fcb78273e9b21c8cc7
3
+ size 4831
runs/Jun24_03-05-11_9476b84f7cc3/events.out.tfevents.1719198320.9476b84f7cc3.14166.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff58f3ff4acb2be833bf91e9e94639d08d498082c271196d5fdaa7c12994cc5a
3
+ size 97407
runs/Jun24_07-18-11_9476b84f7cc3/events.out.tfevents.1719213497.9476b84f7cc3.77049.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:681505284a98d28e1376657d4e279694458ec788eefca97ad84a56c9bcf7176f
3
+ size 4830
runs/Jun24_07-18-59_9476b84f7cc3/events.out.tfevents.1719213548.9476b84f7cc3.77049.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c67f6b0f022f5a2bebb7fb962090a9167b6cc87c9af5697369f5ca7f3e9af891
3
+ size 5035
runs/Jun24_07-19-56_9476b84f7cc3/events.out.tfevents.1719213604.9476b84f7cc3.77049.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ac18d0402aaba91f350b4be751ef7c945dd5423719173174b36d784e6d0adf6
3
+ size 46788
runs/Jun24_07-23-13_9476b84f7cc3/events.out.tfevents.1719213802.9476b84f7cc3.77049.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d41bde662b3e2d7e38783c4163d6e165b507503abfe85978abbabf823196cf2
3
+ size 4184
runs/Jun24_07-24-02_9476b84f7cc3/events.out.tfevents.1719213851.9476b84f7cc3.84365.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef0cf4e9f69a5866e9a3557ec2ef088a810e66d752ddbdf4e3b574529326832
3
+ size 4184
runs/Jun24_07-24-52_9476b84f7cc3/events.out.tfevents.1719213903.9476b84f7cc3.84365.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0f1384bc2b38a4e03150ec16bd1c06b2de4d4abfd9881e95f632cf0b5d5b766
3
+ size 4184
runs/Jun24_07-25-54_9476b84f7cc3/events.out.tfevents.1719213962.9476b84f7cc3.85066.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40f90655bef63cd3de5ee71d5475ff8df136af320d5c59cbb4602b4a9ffc209b
3
+ size 4184
runs/Jun24_07-27-03_9476b84f7cc3/events.out.tfevents.1719214028.9476b84f7cc3.85577.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13b685e788ca17b551f1f46c0aab0aa67192e69a85ac2d0ef0e188e0a50f668f
3
+ size 15208
runs/Jun24_07-30-34_9476b84f7cc3/events.out.tfevents.1719214242.9476b84f7cc3.86614.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d34fdfac53c98cf7c680db635694e8dabfba0b740751760f33ed0b983acf74ca
3
+ size 4184
runs/Jun24_07-31-23_9476b84f7cc3/events.out.tfevents.1719214289.9476b84f7cc3.87793.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd0ba5aefe411eacd1752431c4ae4e07ff88e0462c67e93500a6cc087d2ba9cc
3
+ size 140969
runs/Jun24_08-12-53_9476b84f7cc3/events.out.tfevents.1719216782.9476b84f7cc3.87793.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a187ae636411297ffd80340634fb6916653cd75492e4043e5b47cb35eccc19
3
+ size 140968
runs/Jun24_08-55-18_9476b84f7cc3/events.out.tfevents.1719219326.9476b84f7cc3.87793.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b00d474f6e4ba7023483e7babb1197243e554d4a32a8e0e37bfd4f44dc67438
3
+ size 30262
runs/Jun24_09-03-21_9476b84f7cc3/events.out.tfevents.1719219812.9476b84f7cc3.87793.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:10afd3ea5a106334da1a4ef79b3ed2a34da3ba61d4234e5f2d11bc455d611855
3
+ size 84948
runs/Jun24_09-24-07_9476b84f7cc3/events.out.tfevents.1719221050.9476b84f7cc3.87793.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dcf90571f19aa36b9e4fbb8a4a61efcf7b5f6bf85b0176596ce74a76e17df851
3
+ size 6279
runs/Jun24_09-27-32_9476b84f7cc3/events.out.tfevents.1719221258.9476b84f7cc3.119298.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d42a7b63bac299c16f89b47b96d796bae6b64b72573330f808d763efed852764
3
+ size 140965
runs/Jun24_10-11-56_9476b84f7cc3/events.out.tfevents.1719223928.9476b84f7cc3.119298.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c90950f0ff8d27ef80a70d23395839a8e3c56b481d02258ba1b3517b758d2e0a
3
+ size 86271
runs/Jun24_10-39-02_9476b84f7cc3/events.out.tfevents.1719225550.9476b84f7cc3.138575.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5bcf9837a1434fe89f0465663ccf9caa5a5a6ea9ec5a503430224ee9d24dfe1
3
+ size 1270683
runs/Jun24_17-20-19_9476b84f7cc3/events.out.tfevents.1719249628.9476b84f7cc3.138575.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1335f6de8450fbeb14434804154c5fa18e297b41ca56868c9894690eb3c825c8
3
+ size 11658
runs/Jun24_17-28-09_9476b84f7cc3/events.out.tfevents.1719250100.9476b84f7cc3.138575.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a23dda3a231c33d68c26d1425538dc73a0f34ee327cf808905de1e918b43359
3
+ size 115217
runs/Jun24_17-52-03_9476b84f7cc3/events.out.tfevents.1719251532.9476b84f7cc3.138575.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0fef82a15d50359433b17cbc7e6e19508a2a107b2168c65b449b82d79409aca
3
+ size 26361
runs/Jun24_17-57-40_9476b84f7cc3/events.out.tfevents.1719251869.9476b84f7cc3.138575.4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf97f74165b0f8e3619fb7f901ae7c94efef07cb1b7b3f7d5a307993d076e76b
3
+ size 101989
runs/Jun24_18-16-40_9476b84f7cc3/events.out.tfevents.1719253011.9476b84f7cc3.138575.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70f78c1f4b48548b05595493f65c4bb2661d124bed3fa3370344a29d2614d39
3
+ size 226525
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fe39a7e59cfc34bf787359c8621875a8af28b822ffcc77eace41356553656a4
3
- size 5112
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b8375d79a12b46f0f80fef61798e6dc83ad0397be36c50baa6a2c4a13d08214
3
+ size 5176