Vivaan21 commited on
Commit
7fcca8d
·
verified ·
1 Parent(s): 0a0562a

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,9 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- "<unk>",
4
- "<s>",
5
- "</s>"
6
- ],
7
  "bos_token": {
8
  "content": "<s>",
9
  "lstrip": false,
 
1
  {
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
tokenizer.json CHANGED
@@ -1,7 +1,19 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 2048,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Left",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 2,
14
+ "pad_type_id": 0,
15
+ "pad_token": "</s>"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 0,
tokenizer_config.json CHANGED
@@ -27,18 +27,14 @@
27
  "special": true
28
  }
29
  },
30
- "additional_special_tokens": [
31
- "<unk>",
32
- "<s>",
33
- "</s>"
34
- ],
35
  "bos_token": "<s>",
 
36
  "clean_up_tokenization_spaces": false,
37
  "eos_token": "</s>",
38
  "legacy": true,
39
  "model_max_length": 1000000000000000019884624838656,
40
  "pad_token": "</s>",
41
- "padding_side": "left",
42
  "sp_model_kwargs": {},
43
  "spaces_between_special_tokens": false,
44
  "tokenizer_class": "LlamaTokenizer",
 
27
  "special": true
28
  }
29
  },
30
+ "additional_special_tokens": [],
 
 
 
 
31
  "bos_token": "<s>",
32
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token + ' ' }}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
33
  "clean_up_tokenization_spaces": false,
34
  "eos_token": "</s>",
35
  "legacy": true,
36
  "model_max_length": 1000000000000000019884624838656,
37
  "pad_token": "</s>",
 
38
  "sp_model_kwargs": {},
39
  "spaces_between_special_tokens": false,
40
  "tokenizer_class": "LlamaTokenizer",