NorGLM commited on
Commit
5cbfedc
1 Parent(s): 542497f

Upload 7 files

Browse files
Files changed (7) hide show
  1. README +4 -0
  2. config.json +36 -0
  3. meta.json +86 -0
  4. pytorch_model.bin +3 -0
  5. tf_model.h5 +3 -0
  6. tokenizer_config.json +3 -0
  7. vocab.txt +0 -0
README ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ This archive is part of the NLPL Word Vectors Repository (http://vectors.nlpl.eu/repository/), version 2.0, published on Friday, December 27, 2019.
2
+ Please see the file 'meta.json' in this archive and the overall repository metadata file http://vectors.nlpl.eu/repository/20.json for additional information.
3
+ The life-time identifier for this model is:
4
+ http://vectors.nlpl.eu/repository/20/221.zip
config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "ltg/norbert2",
3
+ "architectures": [
4
+ "BertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 768,
11
+ "id2label": {
12
+ "0": "LABEL_0",
13
+ "1": "LABEL_1",
14
+ "2": "LABEL_2"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 3072,
18
+ "label2id": {
19
+ "LABEL_0": 0,
20
+ "LABEL_1": 1,
21
+ "LABEL_2": 2
22
+ },
23
+ "layer_norm_eps": 1e-12,
24
+ "max_position_embeddings": 512,
25
+ "model_type": "bert",
26
+ "num_attention_heads": 12,
27
+ "num_hidden_layers": 12,
28
+ "pad_token_id": 0,
29
+ "position_embedding_type": "absolute",
30
+ "problem_type": "single_label_classification",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.30.0.dev0",
33
+ "type_vocab_size": 2,
34
+ "use_cache": true,
35
+ "vocab_size": 50104
36
+ }
meta.json ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "algorithm": {
3
+ "id": 9,
4
+ "name": "BERT",
5
+ "url": "https://github.com/google-research/bert",
6
+ "version": null
7
+ },
8
+ "contents": [
9
+ {
10
+ "filename": "config.json",
11
+ "format": "json"
12
+ },
13
+ {
14
+ "filename": "tokenizer_config.json",
15
+ "format": "json"
16
+ },
17
+ {
18
+ "filename": "model.ckpt.index",
19
+ "format": "data"
20
+ },
21
+ {
22
+ "filename": "model.ckpt.data-00000-of-00001",
23
+ "format": "data"
24
+ },
25
+ {
26
+ "filename": "meta.json",
27
+ "format": "json"
28
+ },
29
+ {
30
+ "filename": "vocab.txt",
31
+ "format": "text"
32
+ },
33
+ {
34
+ "filename": "pytorch_model.bin",
35
+ "format": "data"
36
+ },
37
+ {
38
+ "filename": "tf_model.h5",
39
+ "format": "data"
40
+ }
41
+ ],
42
+ "corpus": [
43
+ {
44
+ "NER": false,
45
+ "case preserved": true,
46
+ "description": "Norwegian Colossal Corpus (NCC)",
47
+ "id": 126,
48
+ "language": "nor",
49
+ "lemmatized": false,
50
+ "public": true,
51
+ "stop words removal": null,
52
+ "tagger": "null",
53
+ "tagset": null,
54
+ "tokens": 5000000000,
55
+ "tool": null,
56
+ "url": "https://huggingface.co/datasets/NbAiLab/NCC"
57
+ },
58
+ {
59
+ "NER": false,
60
+ "case preserved": true,
61
+ "description": "C4 Web Corpus",
62
+ "id": 127,
63
+ "language": "nor",
64
+ "lemmatized": false,
65
+ "public": true,
66
+ "stop words removal": null,
67
+ "tagger": null,
68
+ "tagset": null,
69
+ "tokens": 9500000000,
70
+ "tool": "https://github.com/allenai/allennlp/discussions/5265",
71
+ "url": "https://aclanthology.org/2021.naacl-main.41/"
72
+ }
73
+ ],
74
+ "creators": [
75
+ {
76
+ "email": "[email protected]",
77
+ "name": "Andrey Kutuzov"
78
+ }
79
+ ],
80
+ "dimensions": 768,
81
+ "documentation": "http://norlm.nlpl.eu",
82
+ "external_id": "Cased Norwegian BERT Base 2.0 (NorBERT 2)",
83
+ "handle": "http://vectors.nlpl.eu/repository/20/221.zip",
84
+ "id": 221,
85
+ "iterations": 3
86
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adfd656074c1de762820156f2235545a10ab3807dc05c6faefa58cb85da8ac35
3
+ size 498166901
tf_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8741bc2e9666330a411b6e2d1d59280e31c047d141b73f82976ee3d2445b7449
3
+ size 498346776
tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "do_lower_case": false
3
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff