nicolauduran45 commited on
Commit
bfeadd2
·
verified ·
1 Parent(s): 80880e2

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "[BGEO]": 119547,
3
- "[EGEO]": 119548
4
  }
 
1
  {
2
+ "[END]": 28997,
3
+ "[START]": 28996
4
  }
special_tokens_map.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "additional_special_tokens": [
3
  {
4
- "content": "[BGEO]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
- "content": "[EGEO]",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
 
1
  {
2
  "additional_special_tokens": [
3
  {
4
+ "content": "[START]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false
9
  },
10
  {
11
+ "content": "[END]",
12
  "lstrip": false,
13
  "normalized": false,
14
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -40,16 +40,16 @@
40
  "single_word": false,
41
  "special": true
42
  },
43
- "119547": {
44
- "content": "[BGEO]",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
  },
51
- "119548": {
52
- "content": "[EGEO]",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
@@ -58,10 +58,10 @@
58
  }
59
  },
60
  "additional_special_tokens": [
61
- "[BGEO]",
62
- "[EGEO]"
63
  ],
64
- "clean_up_tokenization_spaces": true,
65
  "cls_token": "[CLS]",
66
  "do_lower_case": false,
67
  "mask_token": "[MASK]",
 
40
  "single_word": false,
41
  "special": true
42
  },
43
+ "28996": {
44
+ "content": "[START]",
45
  "lstrip": false,
46
  "normalized": false,
47
  "rstrip": false,
48
  "single_word": false,
49
  "special": true
50
  },
51
+ "28997": {
52
+ "content": "[END]",
53
  "lstrip": false,
54
  "normalized": false,
55
  "rstrip": false,
 
58
  }
59
  },
60
  "additional_special_tokens": [
61
+ "[START]",
62
+ "[END]"
63
  ],
64
+ "clean_up_tokenization_spaces": false,
65
  "cls_token": "[CLS]",
66
  "do_lower_case": false,
67
  "mask_token": "[MASK]",
vocab.txt CHANGED
The diff for this file is too large to render. See raw diff