InstaDeepAI
/

BulkRNABert

Feature Extraction

transcriptomics

Model card Files Files and versions Community

mgelard commited on Jun 25

Commit

27242ad

·

verified ·

1 Parent(s): f959c61

Update tokenizer_config.json

Files changed (1) hide show

tokenizer_config.json +9 -65

tokenizer_config.json CHANGED Viewed

@@ -1,67 +1,11 @@
 {
-  "added_tokens_decoder": {
-    "64": {
-      "content": "<pad>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "65": {
-      "content": "<mask>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "66": {
-      "content": "<unk>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "67": {
-      "content": "<bos>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "68": {
-      "content": "<eos>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    },
-    "69": {
-      "content": "<cls>",
-      "lstrip": false,
-      "normalized": false,
-      "rstrip": false,
-      "single_word": false,
-      "special": true
-    }
-  },
-  "auto_map": {
-    "AutoTokenizer": [
-      "tokenizer.BinnedOmicTokenizer",
-      null
-    ]
-  },
-  "bos_token": "<bos>",
-  "clean_up_tokenization_spaces": true,
-  "cls_token": "<cls>",
-  "eos_token": "<eos>",
-  "mask_token": "<mask>",
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<pad>",
   "tokenizer_class": "BinnedOmicTokenizer",
-  "unk_token": "<unk>"
-}

 {
   "tokenizer_class": "BinnedOmicTokenizer",
+  "n_expressions_bins": 64,
+  "min_omic_value": 0.0,
+  "max_omic_value": 1.0,
+  "use_max_normalization": true,
+  "normalization_factor": 5.547176906585117,
+  "prepend_cls_token": false,
+  "fixed_sequence_length": null,
+  "unpadded_length": null
+}