euclaise commited on
Commit
c69511d
1 Parent(s): d7c1227

Upload tokenizer

Browse files
Files changed (2) hide show
  1. README.md +4 -4
  2. tokenizer_config.json +1 -0
README.md CHANGED
@@ -1,13 +1,13 @@
1
  ---
2
  license: cc-by-sa-3.0
3
- datasets:
4
- - euclaise/TinyCoT
5
- - euclaise/reddit-instruct
6
- - sablo/oasst2_curated
7
  library_name: transformers
8
  tags:
9
  - supertrainer2000
10
  - human-data
 
 
 
 
11
  metrics:
12
  - accuracy
13
  ---
 
1
  ---
2
  license: cc-by-sa-3.0
 
 
 
 
3
  library_name: transformers
4
  tags:
5
  - supertrainer2000
6
  - human-data
7
+ datasets:
8
+ - euclaise/TinyCoT
9
+ - euclaise/reddit-instruct
10
+ - sablo/oasst2_curated
11
  metrics:
12
  - accuracy
13
  ---
tokenizer_config.json CHANGED
@@ -203,6 +203,7 @@
203
  }
204
  },
205
  "bos_token": "<|endoftext|>",
 
206
  "clean_up_tokenization_spaces": true,
207
  "eos_token": "<|endoftext|>",
208
  "model_max_length": 1000000000000000019884624838656,
 
203
  }
204
  },
205
  "bos_token": "<|endoftext|>",
206
+ "chat_template": "{% for message in messages %}{% if message['role'] == 'user' %}{{ '### User:\n' + message['content'].strip() + '\n' }}{% elif message['role'] == 'assistant' %}{{ '### Assistant:\n' + message['content'].strip() + '\n' }}{% endif %}{% endfor %}",
207
  "clean_up_tokenization_spaces": true,
208
  "eos_token": "<|endoftext|>",
209
  "model_max_length": 1000000000000000019884624838656,