lmbelo commited on
Commit
a28b38e
·
verified ·
1 Parent(s): ebab275

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +18 -0
  2. tokenizer_config.json +17 -1
tokenizer.json CHANGED
@@ -47,6 +47,24 @@
47
  "rstrip": false,
48
  "normalized": true,
49
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
 
47
  "rstrip": false,
48
  "normalized": true,
49
  "special": false
50
+ },
51
+ {
52
+ "id": 32002,
53
+ "content": "<pad>",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
+ },
60
+ {
61
+ "id": 32003,
62
+ "content": "<eot_id>",
63
+ "single_word": false,
64
+ "lstrip": false,
65
+ "rstrip": false,
66
+ "normalized": true,
67
+ "special": false
68
  }
69
  ],
70
  "normalizer": {
tokenizer_config.json CHANGED
@@ -41,10 +41,26 @@
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
45
  },
46
  "bos_token": "<s>",
47
- "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
48
  "clean_up_tokenization_spaces": false,
49
  "eos_token": "</s>",
50
  "model_max_length": 1000000000000000019884624838656,
 
41
  "rstrip": false,
42
  "single_word": false,
43
  "special": false
44
+ },
45
+ "32002": {
46
+ "content": "<pad>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "32003": {
54
+ "content": "<eot_id>",
55
+ "lstrip": false,
56
+ "normalized": true,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": false
60
  }
61
  },
62
  "bos_token": "<s>",
63
+ "chat_template": "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<start_header_id>' + message['role'] + '<end_header_id>\n\n'+ message['content'] | trim + '<eot_id>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<start_header_id>assistant<end_header_id>\n\n' }}{% endif %}",
64
  "clean_up_tokenization_spaces": false,
65
  "eos_token": "</s>",
66
  "model_max_length": 1000000000000000019884624838656,