AmitMY's picture
Upload folder using huggingface_hub
3a45c3b verified
!ModelConfig
config_data: !DataConfig
data_statistics: !DataStatistics
average_len_target_per_bucket:
- 6.011277314861252
- 10.39671010650862
- 18.504618481112797
- 26.041977712863865
- 32.48293857888395
- 41.60258525852575
- 51.3669121514993
- 59.76513060097686
- 68.25156398104271
- 75.67976141505562
- 83.09476309226905
- 21.1875
- 7.749999999999999
- 5.833333333333333
- 6.0
- 7.0
- 7.25
- 6.333333333333333
- 7.0
- 5.0
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
- null
buckets:
- !!python/tuple
- 8
- 8
- !!python/tuple
- 16
- 16
- !!python/tuple
- 24
- 24
- !!python/tuple
- 32
- 32
- !!python/tuple
- 40
- 40
- !!python/tuple
- 48
- 48
- !!python/tuple
- 56
- 56
- !!python/tuple
- 64
- 64
- !!python/tuple
- 72
- 72
- !!python/tuple
- 80
- 80
- !!python/tuple
- 88
- 88
- !!python/tuple
- 96
- 96
- !!python/tuple
- 104
- 104
- !!python/tuple
- 112
- 112
- !!python/tuple
- 120
- 120
- !!python/tuple
- 128
- 128
- !!python/tuple
- 136
- 136
- !!python/tuple
- 144
- 144
- !!python/tuple
- 152
- 152
- !!python/tuple
- 160
- 160
- !!python/tuple
- 168
- 168
- !!python/tuple
- 176
- 176
- !!python/tuple
- 184
- 184
- !!python/tuple
- 192
- 192
- !!python/tuple
- 200
- 200
- !!python/tuple
- 208
- 208
- !!python/tuple
- 216
- 216
- !!python/tuple
- 224
- 224
- !!python/tuple
- 232
- 232
- !!python/tuple
- 240
- 240
- !!python/tuple
- 248
- 248
- !!python/tuple
- 256
- 256
- !!python/tuple
- 264
- 264
- !!python/tuple
- 272
- 272
- !!python/tuple
- 280
- 280
- !!python/tuple
- 288
- 288
- !!python/tuple
- 296
- 296
- !!python/tuple
- 304
- 304
- !!python/tuple
- 312
- 312
- !!python/tuple
- 320
- 320
- !!python/tuple
- 328
- 328
- !!python/tuple
- 336
- 336
- !!python/tuple
- 344
- 344
- !!python/tuple
- 352
- 352
- !!python/tuple
- 360
- 360
- !!python/tuple
- 368
- 368
- !!python/tuple
- 376
- 376
- !!python/tuple
- 384
- 384
- !!python/tuple
- 392
- 392
- !!python/tuple
- 400
- 400
- !!python/tuple
- 408
- 408
- !!python/tuple
- 416
- 416
- !!python/tuple
- 424
- 424
- !!python/tuple
- 432
- 432
- !!python/tuple
- 440
- 440
- !!python/tuple
- 448
- 448
- !!python/tuple
- 456
- 456
- !!python/tuple
- 464
- 464
- !!python/tuple
- 472
- 472
- !!python/tuple
- 480
- 480
- !!python/tuple
- 488
- 488
- !!python/tuple
- 496
- 496
- !!python/tuple
- 504
- 504
- !!python/tuple
- 512
- 512
- !!python/tuple
- 513
- 513
length_ratio_mean: 1.4462363190719616
length_ratio_stats_per_bucket:
- !!python/tuple
- 1.13785261776882
- 0.30617192612461125
- !!python/tuple
- 1.6532001104346803
- 0.6379238313847178
- !!python/tuple
- 2.471066932954014
- 0.9542933578872858
- !!python/tuple
- 2.9657919123449297
- 1.5793440289847793
- !!python/tuple
- 3.096168086847911
- 2.441325976657657
- !!python/tuple
- 3.268248874962792
- 2.2403195790511785
- !!python/tuple
- 3.453956732311208
- 2.518686961607376
- !!python/tuple
- 3.2456624469723585
- 2.2619507905455962
- !!python/tuple
- 3.322785538319874
- 2.1624153548695157
- !!python/tuple
- 3.489342008082259
- 2.469578333618206
- !!python/tuple
- 3.278259980416589
- 1.6906831932492183
- !!python/tuple
- 0.23045117210041233
- 0.26501940303452104
- !!python/tuple
- 0.07818400556098729
- 0.027479091254378488
- !!python/tuple
- 0.05445908159715249
- 0.013332869808672716
- !!python/tuple
- 0.05042016806722689
- 0.0
- !!python/tuple
- 0.05658536585365853
- 0.016585365853658534
- !!python/tuple
- 0.05537665840439907
- 0.021182928316626336
- !!python/tuple
- 0.04568795432892477
- 0.009015856562239487
- !!python/tuple
- 0.047619047619047616
- 0.0
- !!python/tuple
- 0.032679738562091505
- 0.0
- &id001 !!python/tuple
- null
- null
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
- *id001
length_ratio_std: 0.8476478699398606
max_observed_len_source: 153
max_observed_len_target: 87
num_discarded: 0
num_sents: 779247
num_sents_per_bucket:
- 478128
- 233503
- 27823
- 9243
- 4982
- 3636
- 3802
- 4709
- 5275
- 4862
- 3208
- 32
- 8
- 12
- 2
- 4
- 8
- 6
- 2
- 2
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
- 0
num_tokens_source: 5305413
num_tokens_target: 7842731
num_unks_source: 0
num_unks_target: 0
size_vocab_source: 45864
size_vocab_target: 656
eop_id: -1
max_seq_len_source: 513
max_seq_len_target: 513
num_source_factors: 1
num_target_factors: 5
config_decoder: !TransformerConfig
act_type: relu
attention_heads: 8
block_prepended_cross_attention: false
decoder_type: transformer
depth_key_value: 512
dropout_act: 0.2
dropout_attention: 0.2
dropout_prepost: 0.2
feed_forward_num_hidden: 2048
max_seq_len_source: 513
max_seq_len_target: 513
model_size: 512
num_layers: 6
positional_embedding_type: fixed
postprocess_sequence: dr
preprocess_sequence: n
use_glu: false
use_lhuc: false
config_embed_source: !EmbeddingConfig
allow_sparse_grad: false
dropout: 0.5
factor_configs: null
num_embed: 512
num_factors: 1
vocab_size: 45864
config_embed_target: !EmbeddingConfig
allow_sparse_grad: false
dropout: 0.5
factor_configs:
- !FactorConfig
combine: sum
num_embed: 512
share_embedding: false
vocab_size: 16
- !FactorConfig
combine: sum
num_embed: 512
share_embedding: false
vocab_size: 24
- !FactorConfig
combine: sum
num_embed: 512
share_embedding: false
vocab_size: 416
- !FactorConfig
combine: sum
num_embed: 512
share_embedding: false
vocab_size: 744
num_embed: 512
num_factors: 5
vocab_size: 656
config_encoder: !TransformerConfig
act_type: relu
attention_heads: 8
block_prepended_cross_attention: false
decoder_type: transformer
depth_key_value: 512
dropout_act: 0.2
dropout_attention: 0.2
dropout_prepost: 0.2
feed_forward_num_hidden: 2048
max_seq_len_source: 513
max_seq_len_target: 513
model_size: 512
num_layers: 6
positional_embedding_type: fixed
postprocess_sequence: dr
preprocess_sequence: n
use_glu: false
use_lhuc: false
config_length_task: null
dtype: float32
lhuc: false
neural_vocab_selection: null
neural_vocab_selection_block_loss: false
vocab_source_size: 45864
vocab_target_size: 656
weight_tying_type: trg_softmax