Wayer commited on
Commit
900a481
·
verified ·
1 Parent(s): 2b3ec35

upload tokenizer and config from stable lm

Browse files
Files changed (3) hide show
  1. config.json +25 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +46 -0
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "StableLmForCausalLM"
4
+ ],
5
+ "bos_token_id": 100257,
6
+ "eos_token_id": 100257,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 2048,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 5632,
11
+ "max_position_embeddings": 4096,
12
+ "model_type": "stablelm",
13
+ "layer_norm_eps": 1e-05,
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 24,
16
+ "num_key_value_heads": 32,
17
+ "partial_rotary_factor": 0.25,
18
+ "rope_theta": 10000,
19
+ "tie_word_embeddings": false,
20
+ "torch_dtype": "float16",
21
+ "transformers_version": "4.38.0",
22
+ "use_cache": true,
23
+ "use_qkv_bias": true,
24
+ "vocab_size": 100352
25
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "additional_special_tokens": [
4
+ "<|reg_extra|>",
5
+ "<|endoftext|>",
6
+ "<|fim_prefix|>",
7
+ "<|fim_middle|>",
8
+ "<|fim_suffix|>",
9
+ "<|fim_pad|>",
10
+ "<gh_stars>",
11
+ "<filename>",
12
+ "<issue_start>",
13
+ "<issue_comment>",
14
+ "<issue_closed>",
15
+ "<jupyter_start>",
16
+ "<jupyter_text>",
17
+ "<jupyter_code>",
18
+ "<jupyter_output>",
19
+ "<empty_output>",
20
+ "<commit_before>",
21
+ "<commit_msg>",
22
+ "<commit_after>",
23
+ "<reponame>",
24
+ "<|endofprompt|>",
25
+ "<|im_start|>",
26
+ "<|im_end|>",
27
+ "<|pause|>",
28
+ "<|reg0|>",
29
+ "<|reg1|>",
30
+ "<|reg2|>",
31
+ "<|reg3|>",
32
+ "<|reg4|>",
33
+ "<|reg5|>",
34
+ "<|reg6|>",
35
+ "<|reg7|>",
36
+ "<|extra0|>"
37
+ ],
38
+ "bos_token": "<|endoftext|>",
39
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
40
+ "clean_up_tokenization_spaces": true,
41
+ "eos_token": "<|endoftext|>",
42
+ "tokenizer_class": "GPT2TokenizerFast",
43
+ "model_max_length": 4096,
44
+ "pad_token": "<|endoftext|>",
45
+ "unk_token": "<|endoftext|>"
46
+ }