Lysa commited on
Commit
4dcf42e
·
1 Parent(s): 4c3564c
Files changed (2) hide show
  1. config.json +15 -25
  2. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "sshleifer/distilbart-xsum-12-1",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
6
  "add_bias_logits": false,
7
- "add_final_layer_norm": false,
8
  "architectures": [
9
- "BartForConditionalGeneration"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 0,
@@ -16,19 +16,13 @@
16
  "decoder_attention_heads": 16,
17
  "decoder_ffn_dim": 4096,
18
  "decoder_layerdrop": 0.0,
19
- "decoder_layers": 1,
20
- "decoder_start_token_id": 2,
21
  "dropout": 0.1,
22
- "early_stopping": true,
23
  "encoder_attention_heads": 16,
24
  "encoder_ffn_dim": 4096,
25
  "encoder_layerdrop": 0.0,
26
  "encoder_layers": 12,
27
  "eos_token_id": 2,
28
- "eos_token_ids": [
29
- 2
30
- ],
31
- "extra_pos_embeddings": 2,
32
  "forced_eos_token_id": 2,
33
  "gradient_checkpointing": false,
34
  "id2label": {
@@ -43,27 +37,23 @@
43
  "LABEL_1": 1,
44
  "LABEL_2": 2
45
  },
46
- "length_penalty": 0.5,
47
- "max_length": 62,
48
  "max_position_embeddings": 1024,
49
- "min_length": 11,
50
- "model_type": "bart",
51
- "no_repeat_ngram_size": 3,
52
- "normalize_before": false,
53
  "normalize_embedding": true,
54
- "num_beams": 6,
55
  "num_hidden_layers": 12,
56
  "output_past": true,
57
  "pad_token_id": 1,
58
- "prefix": " ",
59
- "replacing_rate": 0,
60
- "save_step": 52,
61
- "scale_embedding": false,
62
  "static_position_embeddings": false,
63
- "student_decoder_layers": null,
64
- "student_encoder_layers": null,
65
- "task_specific_params": {},
 
 
66
  "transformers_version": "4.6.1",
67
  "use_cache": true,
68
- "vocab_size": 50264
69
  }
 
1
  {
2
+ "_name_or_path": "facebook/mbart-large-cc25",
3
  "_num_labels": 3,
4
  "activation_dropout": 0.0,
5
  "activation_function": "gelu",
6
  "add_bias_logits": false,
7
+ "add_final_layer_norm": true,
8
  "architectures": [
9
+ "MBartForConditionalGeneration"
10
  ],
11
  "attention_dropout": 0.0,
12
  "bos_token_id": 0,
 
16
  "decoder_attention_heads": 16,
17
  "decoder_ffn_dim": 4096,
18
  "decoder_layerdrop": 0.0,
19
+ "decoder_layers": 12,
 
20
  "dropout": 0.1,
 
21
  "encoder_attention_heads": 16,
22
  "encoder_ffn_dim": 4096,
23
  "encoder_layerdrop": 0.0,
24
  "encoder_layers": 12,
25
  "eos_token_id": 2,
 
 
 
 
26
  "forced_eos_token_id": 2,
27
  "gradient_checkpointing": false,
28
  "id2label": {
 
37
  "LABEL_1": 1,
38
  "LABEL_2": 2
39
  },
40
+ "max_length": 1024,
 
41
  "max_position_embeddings": 1024,
42
+ "model_type": "mbart",
43
+ "normalize_before": true,
 
 
44
  "normalize_embedding": true,
45
+ "num_beams": 5,
46
  "num_hidden_layers": 12,
47
  "output_past": true,
48
  "pad_token_id": 1,
49
+ "scale_embedding": true,
 
 
 
50
  "static_position_embeddings": false,
51
+ "task_specific_params": {
52
+ "translation_en_to_ro": {
53
+ "decoder_start_token_id": 250020
54
+ }
55
+ },
56
  "transformers_version": "4.6.1",
57
  "use_cache": true,
58
+ "vocab_size": 250027
59
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec7657d3380f68ebce97da0c6411dccc3cdd01797b91aa0361495c0b85e7c25f
3
- size 886398433
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7620da2177e1578d7c02c6bf4b3745ec7826ab7df89b470fabc60cee6f6af908
3
+ size 2444611475