|
!ModelConfig |
|
config_data: !DataConfig |
|
data_statistics: !DataStatistics |
|
average_len_target_per_bucket: |
|
- 6.011277314861252 |
|
- 10.39671010650862 |
|
- 18.504618481112797 |
|
- 26.041977712863865 |
|
- 32.48293857888395 |
|
- 41.60258525852575 |
|
- 51.3669121514993 |
|
- 59.76513060097686 |
|
- 68.25156398104271 |
|
- 75.67976141505562 |
|
- 83.09476309226905 |
|
- 21.1875 |
|
- 7.749999999999999 |
|
- 5.833333333333333 |
|
- 6.0 |
|
- 7.0 |
|
- 7.25 |
|
- 6.333333333333333 |
|
- 7.0 |
|
- 5.0 |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
- null |
|
buckets: |
|
- !!python/tuple |
|
- 8 |
|
- 8 |
|
- !!python/tuple |
|
- 16 |
|
- 16 |
|
- !!python/tuple |
|
- 24 |
|
- 24 |
|
- !!python/tuple |
|
- 32 |
|
- 32 |
|
- !!python/tuple |
|
- 40 |
|
- 40 |
|
- !!python/tuple |
|
- 48 |
|
- 48 |
|
- !!python/tuple |
|
- 56 |
|
- 56 |
|
- !!python/tuple |
|
- 64 |
|
- 64 |
|
- !!python/tuple |
|
- 72 |
|
- 72 |
|
- !!python/tuple |
|
- 80 |
|
- 80 |
|
- !!python/tuple |
|
- 88 |
|
- 88 |
|
- !!python/tuple |
|
- 96 |
|
- 96 |
|
- !!python/tuple |
|
- 104 |
|
- 104 |
|
- !!python/tuple |
|
- 112 |
|
- 112 |
|
- !!python/tuple |
|
- 120 |
|
- 120 |
|
- !!python/tuple |
|
- 128 |
|
- 128 |
|
- !!python/tuple |
|
- 136 |
|
- 136 |
|
- !!python/tuple |
|
- 144 |
|
- 144 |
|
- !!python/tuple |
|
- 152 |
|
- 152 |
|
- !!python/tuple |
|
- 160 |
|
- 160 |
|
- !!python/tuple |
|
- 168 |
|
- 168 |
|
- !!python/tuple |
|
- 176 |
|
- 176 |
|
- !!python/tuple |
|
- 184 |
|
- 184 |
|
- !!python/tuple |
|
- 192 |
|
- 192 |
|
- !!python/tuple |
|
- 200 |
|
- 200 |
|
- !!python/tuple |
|
- 208 |
|
- 208 |
|
- !!python/tuple |
|
- 216 |
|
- 216 |
|
- !!python/tuple |
|
- 224 |
|
- 224 |
|
- !!python/tuple |
|
- 232 |
|
- 232 |
|
- !!python/tuple |
|
- 240 |
|
- 240 |
|
- !!python/tuple |
|
- 248 |
|
- 248 |
|
- !!python/tuple |
|
- 256 |
|
- 256 |
|
- !!python/tuple |
|
- 264 |
|
- 264 |
|
- !!python/tuple |
|
- 272 |
|
- 272 |
|
- !!python/tuple |
|
- 280 |
|
- 280 |
|
- !!python/tuple |
|
- 288 |
|
- 288 |
|
- !!python/tuple |
|
- 296 |
|
- 296 |
|
- !!python/tuple |
|
- 304 |
|
- 304 |
|
- !!python/tuple |
|
- 312 |
|
- 312 |
|
- !!python/tuple |
|
- 320 |
|
- 320 |
|
- !!python/tuple |
|
- 328 |
|
- 328 |
|
- !!python/tuple |
|
- 336 |
|
- 336 |
|
- !!python/tuple |
|
- 344 |
|
- 344 |
|
- !!python/tuple |
|
- 352 |
|
- 352 |
|
- !!python/tuple |
|
- 360 |
|
- 360 |
|
- !!python/tuple |
|
- 368 |
|
- 368 |
|
- !!python/tuple |
|
- 376 |
|
- 376 |
|
- !!python/tuple |
|
- 384 |
|
- 384 |
|
- !!python/tuple |
|
- 392 |
|
- 392 |
|
- !!python/tuple |
|
- 400 |
|
- 400 |
|
- !!python/tuple |
|
- 408 |
|
- 408 |
|
- !!python/tuple |
|
- 416 |
|
- 416 |
|
- !!python/tuple |
|
- 424 |
|
- 424 |
|
- !!python/tuple |
|
- 432 |
|
- 432 |
|
- !!python/tuple |
|
- 440 |
|
- 440 |
|
- !!python/tuple |
|
- 448 |
|
- 448 |
|
- !!python/tuple |
|
- 456 |
|
- 456 |
|
- !!python/tuple |
|
- 464 |
|
- 464 |
|
- !!python/tuple |
|
- 472 |
|
- 472 |
|
- !!python/tuple |
|
- 480 |
|
- 480 |
|
- !!python/tuple |
|
- 488 |
|
- 488 |
|
- !!python/tuple |
|
- 496 |
|
- 496 |
|
- !!python/tuple |
|
- 504 |
|
- 504 |
|
- !!python/tuple |
|
- 512 |
|
- 512 |
|
- !!python/tuple |
|
- 513 |
|
- 513 |
|
length_ratio_mean: 1.4462363190719616 |
|
length_ratio_stats_per_bucket: |
|
- !!python/tuple |
|
- 1.13785261776882 |
|
- 0.30617192612461125 |
|
- !!python/tuple |
|
- 1.6532001104346803 |
|
- 0.6379238313847178 |
|
- !!python/tuple |
|
- 2.471066932954014 |
|
- 0.9542933578872858 |
|
- !!python/tuple |
|
- 2.9657919123449297 |
|
- 1.5793440289847793 |
|
- !!python/tuple |
|
- 3.096168086847911 |
|
- 2.441325976657657 |
|
- !!python/tuple |
|
- 3.268248874962792 |
|
- 2.2403195790511785 |
|
- !!python/tuple |
|
- 3.453956732311208 |
|
- 2.518686961607376 |
|
- !!python/tuple |
|
- 3.2456624469723585 |
|
- 2.2619507905455962 |
|
- !!python/tuple |
|
- 3.322785538319874 |
|
- 2.1624153548695157 |
|
- !!python/tuple |
|
- 3.489342008082259 |
|
- 2.469578333618206 |
|
- !!python/tuple |
|
- 3.278259980416589 |
|
- 1.6906831932492183 |
|
- !!python/tuple |
|
- 0.23045117210041233 |
|
- 0.26501940303452104 |
|
- !!python/tuple |
|
- 0.07818400556098729 |
|
- 0.027479091254378488 |
|
- !!python/tuple |
|
- 0.05445908159715249 |
|
- 0.013332869808672716 |
|
- !!python/tuple |
|
- 0.05042016806722689 |
|
- 0.0 |
|
- !!python/tuple |
|
- 0.05658536585365853 |
|
- 0.016585365853658534 |
|
- !!python/tuple |
|
- 0.05537665840439907 |
|
- 0.021182928316626336 |
|
- !!python/tuple |
|
- 0.04568795432892477 |
|
- 0.009015856562239487 |
|
- !!python/tuple |
|
- 0.047619047619047616 |
|
- 0.0 |
|
- !!python/tuple |
|
- 0.032679738562091505 |
|
- 0.0 |
|
- &id001 !!python/tuple |
|
- null |
|
- null |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
- *id001 |
|
length_ratio_std: 0.8476478699398606 |
|
max_observed_len_source: 153 |
|
max_observed_len_target: 87 |
|
num_discarded: 0 |
|
num_sents: 779247 |
|
num_sents_per_bucket: |
|
- 478128 |
|
- 233503 |
|
- 27823 |
|
- 9243 |
|
- 4982 |
|
- 3636 |
|
- 3802 |
|
- 4709 |
|
- 5275 |
|
- 4862 |
|
- 3208 |
|
- 32 |
|
- 8 |
|
- 12 |
|
- 2 |
|
- 4 |
|
- 8 |
|
- 6 |
|
- 2 |
|
- 2 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
- 0 |
|
num_tokens_source: 5305413 |
|
num_tokens_target: 7842731 |
|
num_unks_source: 0 |
|
num_unks_target: 0 |
|
size_vocab_source: 45864 |
|
size_vocab_target: 656 |
|
eop_id: -1 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 513 |
|
num_source_factors: 1 |
|
num_target_factors: 5 |
|
config_decoder: !TransformerConfig |
|
act_type: relu |
|
attention_heads: 8 |
|
block_prepended_cross_attention: false |
|
decoder_type: transformer |
|
depth_key_value: 512 |
|
dropout_act: 0.2 |
|
dropout_attention: 0.2 |
|
dropout_prepost: 0.2 |
|
feed_forward_num_hidden: 2048 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 513 |
|
model_size: 512 |
|
num_layers: 6 |
|
positional_embedding_type: fixed |
|
postprocess_sequence: dr |
|
preprocess_sequence: n |
|
use_glu: false |
|
use_lhuc: false |
|
config_embed_source: !EmbeddingConfig |
|
allow_sparse_grad: false |
|
dropout: 0.5 |
|
factor_configs: null |
|
num_embed: 512 |
|
num_factors: 1 |
|
vocab_size: 45864 |
|
config_embed_target: !EmbeddingConfig |
|
allow_sparse_grad: false |
|
dropout: 0.5 |
|
factor_configs: |
|
- !FactorConfig |
|
combine: sum |
|
num_embed: 512 |
|
share_embedding: false |
|
vocab_size: 16 |
|
- !FactorConfig |
|
combine: sum |
|
num_embed: 512 |
|
share_embedding: false |
|
vocab_size: 24 |
|
- !FactorConfig |
|
combine: sum |
|
num_embed: 512 |
|
share_embedding: false |
|
vocab_size: 416 |
|
- !FactorConfig |
|
combine: sum |
|
num_embed: 512 |
|
share_embedding: false |
|
vocab_size: 744 |
|
num_embed: 512 |
|
num_factors: 5 |
|
vocab_size: 656 |
|
config_encoder: !TransformerConfig |
|
act_type: relu |
|
attention_heads: 8 |
|
block_prepended_cross_attention: false |
|
decoder_type: transformer |
|
depth_key_value: 512 |
|
dropout_act: 0.2 |
|
dropout_attention: 0.2 |
|
dropout_prepost: 0.2 |
|
feed_forward_num_hidden: 2048 |
|
max_seq_len_source: 513 |
|
max_seq_len_target: 513 |
|
model_size: 512 |
|
num_layers: 6 |
|
positional_embedding_type: fixed |
|
postprocess_sequence: dr |
|
preprocess_sequence: n |
|
use_glu: false |
|
use_lhuc: false |
|
config_length_task: null |
|
dtype: float32 |
|
lhuc: false |
|
neural_vocab_selection: null |
|
neural_vocab_selection_block_loss: false |
|
vocab_source_size: 45864 |
|
vocab_target_size: 656 |
|
weight_tying_type: trg_softmax |
|
|