mgelard commited on
Commit
abcfcdc
·
verified ·
1 Parent(s): e308d04

Add tokenizer config

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +26 -6
tokenizer_config.json CHANGED
@@ -1,12 +1,32 @@
1
  {
2
- "added_tokens_decoder": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  "auto_map": {
4
  "AutoTokenizer": [
5
  "tokenizer.MOJOTokenizer",
6
  null
7
  ]
8
- },
9
- "clean_up_tokenization_spaces": true,
10
- "model_max_length": 1000000000000000019884624838656,
11
- "tokenizer_class": "MOJOTokenizer"
12
- }
 
1
  {
2
+ "tokenizer_class": "MOJOTokenizer",
3
+ "n_expressions_bins": {
4
+ "rnaseq": 64,
5
+ "methylation": 64
6
+ },
7
+ "min_omic_value": {
8
+ "rnaseq": 0.0,
9
+ "methylation": 0.006863548701544
10
+ },
11
+ "max_omic_value": {
12
+ "rnaseq": 1.0,
13
+ "methylation": 0.992831582796998
14
+ },
15
+ "use_max_normalization": {
16
+ "rnaseq": true,
17
+ "methylation": false
18
+ },
19
+ "normalization_factor": {
20
+ "rnaseq": 5.52786861525666,
21
+ "methylation": 1.0
22
+ },
23
+ "prepend_cls_token": false,
24
+ "fixed_sequence_length": 17152,
25
+ "unpadded_length": 17116,
26
  "auto_map": {
27
  "AutoTokenizer": [
28
  "tokenizer.MOJOTokenizer",
29
  null
30
  ]
31
+ }
32
+ }