heldJan commited on
Commit
d41daa7
·
verified ·
1 Parent(s): 45e2508

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,8 +1,5 @@
1
  {
2
  "<im_end>": 32002,
3
  "<im_patch>": 32000,
4
- "<im_start>": 32001,
5
- "<vid_end>": 32005,
6
- "<vid_patch>": 32003,
7
- "<vid_start>": 32004
8
  }
 
1
  {
2
  "<im_end>": 32002,
3
  "<im_patch>": 32000,
4
+ "<im_start>": 32001
 
 
 
5
  }
special_tokens_map.json CHANGED
@@ -1,27 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<vid_patch>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "<vid_start>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- },
17
- {
18
- "content": "<vid_end>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- }
24
- ],
25
  "bos_token": {
26
  "content": "<s>",
27
  "lstrip": false,
@@ -36,13 +13,7 @@
36
  "rstrip": false,
37
  "single_word": false
38
  },
39
- "pad_token": {
40
- "content": "<unk>",
41
- "lstrip": false,
42
- "normalized": false,
43
- "rstrip": false,
44
- "single_word": false
45
- },
46
  "unk_token": {
47
  "content": "<unk>",
48
  "lstrip": false,
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "bos_token": {
3
  "content": "<s>",
4
  "lstrip": false,
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": "</s>",
 
 
 
 
 
 
17
  "unk_token": {
18
  "content": "<unk>",
19
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -49,44 +49,14 @@
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": false
52
- },
53
- "32003": {
54
- "content": "<vid_patch>",
55
- "lstrip": false,
56
- "normalized": false,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": true
60
- },
61
- "32004": {
62
- "content": "<vid_start>",
63
- "lstrip": false,
64
- "normalized": false,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": true
68
- },
69
- "32005": {
70
- "content": "<vid_end>",
71
- "lstrip": false,
72
- "normalized": false,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": true
76
  }
77
  },
78
- "additional_special_tokens": [
79
- "<vid_patch>",
80
- "<vid_start>",
81
- "<vid_end>"
82
- ],
83
  "bos_token": "<s>",
84
  "clean_up_tokenization_spaces": false,
85
  "eos_token": "</s>",
86
  "legacy": true,
87
- "max_length": 64,
88
- "model_max_length": 1048,
89
- "pad_token": "<unk>",
90
  "padding_side": "right",
91
  "sp_model_kwargs": {},
92
  "spaces_between_special_tokens": false,
 
49
  "rstrip": false,
50
  "single_word": false,
51
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  }
53
  },
 
 
 
 
 
54
  "bos_token": "<s>",
55
  "clean_up_tokenization_spaces": false,
56
  "eos_token": "</s>",
57
  "legacy": true,
58
+ "model_max_length": 2048,
59
+ "pad_token": "</s>",
 
60
  "padding_side": "right",
61
  "sp_model_kwargs": {},
62
  "spaces_between_special_tokens": false,