pardeepSF commited on
Commit
5ee1b2d
·
1 Parent(s): 66e168c

previous commit of bert vendor

Browse files
config.json CHANGED
@@ -1,30 +1,27 @@
1
  {
2
- "_name_or_path": "microsoft/layoutlm-large-uncased",
3
  "architectures": [
4
  "LayoutLMForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
  "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
  "hidden_act": "gelu",
12
  "hidden_dropout_prob": 0.1,
13
  "hidden_size": 768,
14
  "initializer_range": 0.02,
15
  "intermediate_size": 3072,
16
- "layer_norm_eps": 1e-05,
17
  "max_2d_position_embeddings": 1024,
18
- "max_position_embeddings": 514,
19
- "model_type": "roberta",
20
  "num_attention_heads": 12,
21
  "num_hidden_layers": 12,
22
- "pad_token_id": 1,
 
23
  "position_embedding_type": "absolute",
24
- "tokenizer_class": "RobertaTokenizer",
25
  "torch_dtype": "float32",
26
- "transformers_version": "4.23.1",
27
  "type_vocab_size": 2,
28
  "use_cache": true,
29
- "vocab_size": 50265
30
  }
 
1
  {
2
+ "_name_or_path": "microsoft/layoutlm-base-uncased",
3
  "architectures": [
4
  "LayoutLMForQuestionAnswering"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
 
7
  "classifier_dropout": null,
 
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 768,
11
  "initializer_range": 0.02,
12
  "intermediate_size": 3072,
13
+ "layer_norm_eps": 1e-12,
14
  "max_2d_position_embeddings": 1024,
15
+ "max_position_embeddings": 512,
16
+ "model_type": "bert",
17
  "num_attention_heads": 12,
18
  "num_hidden_layers": 12,
19
+ "output_past": true,
20
+ "pad_token_id": 0,
21
  "position_embedding_type": "absolute",
 
22
  "torch_dtype": "float32",
23
+ "transformers_version": "4.22.2",
24
  "type_vocab_size": 2,
25
  "use_cache": true,
26
+ "vocab_size": 30522
27
  }
merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:76321879ebf281a0bdc735ca330337451d10e180ac3fd021a704b00137510021
3
- size 511247909
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1802e85d60683f7abd7e63417d78579ceb47cc176ac574eb2ad67716f8f5b9aa
3
+ size 450591205
special_tokens_map.json CHANGED
@@ -1,15 +1,7 @@
1
  {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
  }
 
1
  {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
7
  }
tokenizer_config.json CHANGED
@@ -1,65 +1,16 @@
1
  {
2
- "add_prefix_space": true,
3
- "bos_token": {
4
- "__type": "AddedToken",
5
- "content": "<s>",
6
- "lstrip": false,
7
- "normalized": true,
8
- "rstrip": false,
9
- "single_word": false
10
- },
11
- "cls_token": {
12
- "__type": "AddedToken",
13
- "content": "<s>",
14
- "lstrip": false,
15
- "normalized": true,
16
- "rstrip": false,
17
- "single_word": false
18
- },
19
  "do_lower_case": true,
20
- "eos_token": {
21
- "__type": "AddedToken",
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": true,
25
- "rstrip": false,
26
- "single_word": false
27
- },
28
- "errors": "replace",
29
- "mask_token": {
30
- "__type": "AddedToken",
31
- "content": "<mask>",
32
- "lstrip": true,
33
- "normalized": true,
34
- "rstrip": false,
35
- "single_word": false
36
- },
37
- "model_max_length": 512,
38
- "name_or_path": "impira/layoutlm-document-qa",
39
- "pad_token": {
40
- "__type": "AddedToken",
41
- "content": "<pad>",
42
- "lstrip": false,
43
- "normalized": true,
44
- "rstrip": false,
45
- "single_word": false
46
- },
47
- "sep_token": {
48
- "__type": "AddedToken",
49
- "content": "</s>",
50
- "lstrip": false,
51
- "normalized": true,
52
- "rstrip": false,
53
- "single_word": false
54
- },
55
- "special_tokens_map_file": null,
56
- "tokenizer_class": "RobertaTokenizer",
57
- "unk_token": {
58
- "__type": "AddedToken",
59
- "content": "<unk>",
60
- "lstrip": false,
61
- "normalized": true,
62
- "rstrip": false,
63
- "single_word": false
64
- }
65
  }
 
1
  {
2
+ "cls_token": "[CLS]",
3
+ "do_basic_tokenize": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "max_len": 512,
7
+ "name_or_path": "microsoft/layoutlm-base-uncased",
8
+ "never_split": null,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "special_tokens_map_file": "data/models/models--microsoft--layoutlm-base-uncased/snapshots/ca841ce8d2f46b13b0ac3f635b8eb7d2e1d758d5/special_tokens_map.json",
12
+ "strip_accents": null,
13
+ "tokenize_chinese_chars": true,
14
+ "tokenizer_class": "BertTokenizer",
15
+ "unk_token": "[UNK]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9518cb20ba30d0bc101127dbdfff2574f450d60c3031967b86272a24d6ee0123
3
- size 1583
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c06707d9a8295ab1480fa3cb6ee9e537946e006fb9e60869dbbc0b7422ae8b00
3
+ size 1519
vocab.json DELETED
The diff for this file is too large to render. See raw diff
 
vocab.txt ADDED
The diff for this file is too large to render. See raw diff