dyh2111 commited on
Commit
e608275
1 Parent(s): 351844e

Upload tokenizer

Browse files
Files changed (4) hide show
  1. README.md +7 -6
  2. special_tokens_map.json +1 -1
  3. tokenizer.json +0 -0
  4. tokenizer_config.json +39 -24
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
  extra_gated_heading: Access Llama 2 on Hugging Face
3
- extra_gated_description: >-
4
- This is a form to enable access to Llama 2 on Hugging Face after you have been
5
- granted access from Meta. Please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads) and accept our
6
- license terms and acceptable use policy before submitting this form. Requests
7
- will be processed in 1-2 days.
8
  extra_gated_button_content: Submit
9
  extra_gated_fields:
10
- I agree to share my name, email address and username with Meta and confirm that I have already been granted download access on the Meta website: checkbox
 
 
11
  language:
12
  - en
13
  pipeline_tag: text-generation
 
1
  ---
2
  extra_gated_heading: Access Llama 2 on Hugging Face
3
+ extra_gated_description: This is a form to enable access to Llama 2 on Hugging Face
4
+ after you have been granted access from Meta. Please visit the [Meta website](https://ai.meta.com/resources/models-and-libraries/llama-downloads)
5
+ and accept our license terms and acceptable use policy before submitting this form.
6
+ Requests will be processed in 1-2 days.
 
7
  extra_gated_button_content: Submit
8
  extra_gated_fields:
9
+ ? I agree to share my name, email address and username with Meta and confirm that
10
+ I have already been granted download access on the Meta website
11
+ : checkbox
12
  language:
13
  - en
14
  pipeline_tag: text-generation
special_tokens_map.json CHANGED
@@ -13,7 +13,7 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<unk>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "</s>",
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,34 +1,49 @@
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
- "bos_token": {
5
- "__type": "AddedToken",
6
- "content": "<s>",
7
- "lstrip": false,
8
- "normalized": true,
9
- "rstrip": false,
10
- "single_word": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  },
 
12
  "clean_up_tokenization_spaces": false,
13
- "eos_token": {
14
- "__type": "AddedToken",
15
- "content": "</s>",
16
- "lstrip": false,
17
- "normalized": true,
18
- "rstrip": false,
19
- "single_word": false
20
- },
21
  "legacy": false,
22
  "model_max_length": 1000000000000000019884624838656,
23
- "pad_token": null,
24
  "sp_model_kwargs": {},
25
  "tokenizer_class": "LlamaTokenizer",
26
- "unk_token": {
27
- "__type": "AddedToken",
28
- "content": "<unk>",
29
- "lstrip": false,
30
- "normalized": true,
31
- "rstrip": false,
32
- "single_word": false
33
- }
34
  }
 
1
  {
2
  "add_bos_token": true,
3
  "add_eos_token": false,
4
+ "add_prefix_space": null,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": true,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": true,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": true,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ },
30
+ "32000": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false,
36
+ "special": false
37
+ }
38
  },
39
+ "bos_token": "<s>",
40
  "clean_up_tokenization_spaces": false,
41
+ "eos_token": "</s>",
 
 
 
 
 
 
 
42
  "legacy": false,
43
  "model_max_length": 1000000000000000019884624838656,
44
+ "pad_token": "</s>",
45
  "sp_model_kwargs": {},
46
  "tokenizer_class": "LlamaTokenizer",
47
+ "unk_token": "<unk>",
48
+ "use_default_system_prompt": false
 
 
 
 
 
 
49
  }