pglo commited on
Commit
88b3455
1 Parent(s): 5a0c420

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "architectures": [
3
  "ZambaForCausalLM"
4
  ],
@@ -12,6 +13,84 @@
12
  "hidden_size": 3712,
13
  "initializer_range": 0.02,
14
  "intermediate_size": 14848,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  "mamba_conv_bias": true,
16
  "mamba_d_conv": 4,
17
  "mamba_d_state": 16,
@@ -25,12 +104,15 @@
25
  "num_hidden_layers": 76,
26
  "num_key_value_heads": 16,
27
  "num_logits_to_keep": 1,
 
28
  "rms_norm_eps": 1e-05,
29
  "rope_theta": 10000,
30
  "sliding_window": null,
 
 
 
31
  "torch_dtype": "bfloat16",
32
- "transformers_version": "4.42.0.dev0",
33
- "unk_token_id": 0,
34
  "use_cache": true,
35
  "use_mamba_kernels": true,
36
  "vocab_size": 32000
 
1
  {
2
+ "_name_or_path": "Zyphra/Zamba-7B-v1",
3
  "architectures": [
4
  "ZambaForCausalLM"
5
  ],
 
13
  "hidden_size": 3712,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 14848,
16
+ "layers_block_type": [
17
+ "mamba",
18
+ "mamba",
19
+ "hybrid",
20
+ "mamba",
21
+ "mamba",
22
+ "mamba",
23
+ "mamba",
24
+ "hybrid",
25
+ "mamba",
26
+ "mamba",
27
+ "mamba",
28
+ "mamba",
29
+ "mamba",
30
+ "hybrid",
31
+ "mamba",
32
+ "mamba",
33
+ "mamba",
34
+ "mamba",
35
+ "mamba",
36
+ "hybrid",
37
+ "mamba",
38
+ "mamba",
39
+ "mamba",
40
+ "mamba",
41
+ "mamba",
42
+ "hybrid",
43
+ "mamba",
44
+ "mamba",
45
+ "mamba",
46
+ "mamba",
47
+ "mamba",
48
+ "hybrid",
49
+ "mamba",
50
+ "mamba",
51
+ "mamba",
52
+ "mamba",
53
+ "mamba",
54
+ "hybrid",
55
+ "mamba",
56
+ "mamba",
57
+ "mamba",
58
+ "mamba",
59
+ "mamba",
60
+ "hybrid",
61
+ "mamba",
62
+ "mamba",
63
+ "mamba",
64
+ "mamba",
65
+ "mamba",
66
+ "hybrid",
67
+ "mamba",
68
+ "mamba",
69
+ "mamba",
70
+ "mamba",
71
+ "mamba",
72
+ "hybrid",
73
+ "mamba",
74
+ "mamba",
75
+ "mamba",
76
+ "mamba",
77
+ "mamba",
78
+ "hybrid",
79
+ "mamba",
80
+ "mamba",
81
+ "mamba",
82
+ "mamba",
83
+ "mamba",
84
+ "hybrid",
85
+ "mamba",
86
+ "mamba",
87
+ "mamba",
88
+ "mamba",
89
+ "mamba",
90
+ "hybrid",
91
+ "mamba",
92
+ "mamba"
93
+ ],
94
  "mamba_conv_bias": true,
95
  "mamba_d_conv": 4,
96
  "mamba_d_state": 16,
 
104
  "num_hidden_layers": 76,
105
  "num_key_value_heads": 16,
106
  "num_logits_to_keep": 1,
107
+ "pad_token_id": 0,
108
  "rms_norm_eps": 1e-05,
109
  "rope_theta": 10000,
110
  "sliding_window": null,
111
+ "time_step_floor": 0.0001,
112
+ "time_step_max": 0.1,
113
+ "time_step_min": 0.001,
114
  "torch_dtype": "bfloat16",
115
+ "transformers_version": "4.45.0.dev0",
 
116
  "use_cache": true,
117
  "use_mamba_kernels": true,
118
  "vocab_size": 32000
generation_config.json CHANGED
@@ -2,5 +2,6 @@
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
- "transformers_version": "4.42.0.dev0"
 
6
  }
 
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
  "eos_token_id": 2,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.45.0.dev0"
7
  }
special_tokens_map.json CHANGED
@@ -13,8 +13,8 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "unk_token": {
17
- "content": "<unk>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -1,17 +1,15 @@
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
5
  "added_tokens": [
6
- {
7
- "id": 0,
8
- "content": "<unk>",
9
- "single_word": false,
10
- "lstrip": false,
11
- "rstrip": false,
12
- "normalized": false,
13
- "special": true
14
- },
15
  {
16
  "id": 1,
17
  "content": "<s>",
@@ -136,7 +134,7 @@
136
  "byte_fallback": true,
137
  "ignore_merges": false,
138
  "vocab": {
139
- "<unk>": 0,
140
  "<s>": 1,
141
  "</s>": 2,
142
  "<0x00>": 3,
 
1
  {
2
  "version": "1.0",
3
  "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Left",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 0,
9
+ "pad_type_id": 0,
10
+ "pad_token": "[PAD]"
11
+ },
12
  "added_tokens": [
 
 
 
 
 
 
 
 
 
13
  {
14
  "id": 1,
15
  "content": "<s>",
 
134
  "byte_fallback": true,
135
  "ignore_merges": false,
136
  "vocab": {
137
+ "[PAD]": 0,
138
  "<s>": 1,
139
  "</s>": 2,
140
  "<0x00>": 3,
tokenizer_config.json CHANGED
@@ -1,9 +1,10 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
 
4
  "added_tokens_decoder": {
5
  "0": {
6
- "content": "<unk>",
7
  "lstrip": false,
8
  "normalized": false,
9
  "rstrip": false,
@@ -31,8 +32,9 @@
31
  "bos_token": "<s>",
32
  "clean_up_tokenization_spaces": false,
33
  "eos_token": "</s>",
 
34
  "model_max_length": 1000000000000000019884624838656,
35
- "pad_token": null,
36
  "sp_model_kwargs": {},
37
  "spaces_between_special_tokens": false,
38
  "tokenizer_class": "LlamaTokenizer",
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
  "added_tokens_decoder": {
6
  "0": {
7
+ "content": "[PAD]",
8
  "lstrip": false,
9
  "normalized": false,
10
  "rstrip": false,
 
32
  "bos_token": "<s>",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
+ "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
+ "pad_token": "[PAD]",
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",