ar5entum commited on
Commit
61efb60
·
verified ·
1 Parent(s): d3b56a5

End of training

Browse files
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  library_name: transformers
3
- base_model: danasone/bart-small-ru-en
 
 
4
  tags:
5
  - generated_from_trainer
6
- metrics:
7
- - bleu
8
  model-index:
9
  - name: bart_eng_hin_mt
10
  results: []
@@ -15,11 +15,16 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # bart_eng_hin_mt
17
 
18
- This model is a fine-tuned version of [danasone/bart-small-ru-en](https://huggingface.co/danasone/bart-small-ru-en) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 0.3398
21
- - Bleu: 10.015
22
- - Gen Len: 123.5141
 
 
 
 
 
23
 
24
  ## Model description
25
 
@@ -38,28 +43,17 @@ More information needed
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
- - learning_rate: 0.0008
42
- - train_batch_size: 300
43
- - eval_batch_size: 20
44
  - seed: 42
45
  - distributed_type: multi-GPU
46
- - num_devices: 8
47
- - total_train_batch_size: 2400
48
- - total_eval_batch_size: 160
49
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
50
  - lr_scheduler_type: linear
51
- - lr_scheduler_warmup_steps: 1000
52
- - num_epochs: 15.0
53
-
54
- ### Training results
55
-
56
- | Training Loss | Epoch | Step | Validation Loss | Bleu | Gen Len |
57
- |:-------------:|:-----:|:----:|:---------------:|:------:|:--------:|
58
- | 0.5522 | 1.0 | 689 | 0.5343 | 3.4797 | 123.3969 |
59
- | 0.3988 | 2.0 | 1378 | 0.4020 | 7.5644 | 123.3578 |
60
- | 0.3496 | 3.0 | 2067 | 0.3601 | 9.3506 | 123.4641 |
61
- | 0.3191 | 4.0 | 2756 | 0.3398 | 10.015 | 123.5141 |
62
-
63
 
64
  ### Framework versions
65
 
 
1
  ---
2
  library_name: transformers
3
+ language:
4
+ - hi
5
+ base_model: ar5entum/bart_eng_hin_mt
6
  tags:
7
  - generated_from_trainer
 
 
8
  model-index:
9
  - name: bart_eng_hin_mt
10
  results: []
 
15
 
16
  # bart_eng_hin_mt
17
 
18
+ This model is a fine-tuned version of [ar5entum/bart_eng_hin_mt](https://huggingface.co/ar5entum/bart_eng_hin_mt) on an unknown dataset.
19
  It achieves the following results on the evaluation set:
20
+ - eval_loss: 0.5147
21
+ - eval_model_preparation_time: 0.0051
22
+ - eval_bleu: 11.8141
23
+ - eval_gen_len: 122.6932
24
+ - eval_runtime: 3.6543
25
+ - eval_samples_per_second: 142.3
26
+ - eval_steps_per_second: 1.642
27
+ - step: 0
28
 
29
  ## Model description
30
 
 
43
  ### Training hyperparameters
44
 
45
  The following hyperparameters were used during training:
46
+ - learning_rate: 5e-05
47
+ - train_batch_size: 8
48
+ - eval_batch_size: 22
49
  - seed: 42
50
  - distributed_type: multi-GPU
51
+ - num_devices: 4
52
+ - total_train_batch_size: 32
53
+ - total_eval_batch_size: 88
54
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
  - lr_scheduler_type: linear
56
+ - num_epochs: 3.0
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  ### Framework versions
59
 
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_bleu": 11.8141,
3
+ "eval_gen_len": 122.6932,
4
+ "eval_loss": 0.5147402286529541,
5
+ "eval_model_preparation_time": 0.0051,
6
+ "eval_runtime": 3.6543,
7
+ "eval_samples": 520,
8
+ "eval_samples_per_second": 142.3,
9
+ "eval_steps_per_second": 1.642,
10
+ "predict_bleu": 10.684,
11
+ "predict_gen_len": 125.2625,
12
+ "predict_loss": 0.5075575113296509,
13
+ "predict_model_preparation_time": 0.0051,
14
+ "predict_runtime": 13.2991,
15
+ "predict_samples": 2507,
16
+ "predict_samples_per_second": 188.508,
17
+ "predict_steps_per_second": 2.181
18
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_bleu": 11.8141,
3
+ "eval_gen_len": 122.6932,
4
+ "eval_loss": 0.5147402286529541,
5
+ "eval_model_preparation_time": 0.0051,
6
+ "eval_runtime": 3.6543,
7
+ "eval_samples": 520,
8
+ "eval_samples_per_second": 142.3,
9
+ "eval_steps_per_second": 1.642
10
+ }
generated_predictions.txt ADDED
The diff for this file is too large to render. See raw diff
 
predict_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "predict_bleu": 10.684,
3
+ "predict_gen_len": 125.2625,
4
+ "predict_loss": 0.5075575113296509,
5
+ "predict_model_preparation_time": 0.0051,
6
+ "predict_runtime": 13.2991,
7
+ "predict_samples": 2507,
8
+ "predict_samples_per_second": 188.508,
9
+ "predict_steps_per_second": 2.181
10
+ }
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
@@ -298,7 +303,7 @@
298
  },
299
  {
300
  "SpecialToken": {
301
- "id": "hi_IN",
302
  "type_id": 0
303
  }
304
  }
@@ -324,7 +329,7 @@
324
  },
325
  {
326
  "SpecialToken": {
327
- "id": "hi_IN",
328
  "type_id": 0
329
  }
330
  }
@@ -339,13 +344,13 @@
339
  "</s>"
340
  ]
341
  },
342
- "hi_IN": {
343
- "id": "hi_IN",
344
  "ids": [
345
- 65545
346
  ],
347
  "tokens": [
348
- "hi_IN"
349
  ]
350
  }
351
  }
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 128,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
 
303
  },
304
  {
305
  "SpecialToken": {
306
+ "id": "hi",
307
  "type_id": 0
308
  }
309
  }
 
329
  },
330
  {
331
  "SpecialToken": {
332
+ "id": "hi",
333
  "type_id": 0
334
  }
335
  }
 
344
  "</s>"
345
  ]
346
  },
347
+ "hi": {
348
+ "id": "hi",
349
  "ids": [
350
+ 16960
351
  ],
352
  "tokens": [
353
+ "hi"
354
  ]
355
  }
356
  }
tokenizer_config.json CHANGED
@@ -276,12 +276,16 @@
276
  "errors": "replace",
277
  "mask_token": "<mask>",
278
  "max_len": 512,
 
279
  "model_max_length": 1024,
280
  "pad_token": "<pad>",
281
  "sep_token": "</s>",
282
- "src_lang": "hi_IN",
283
- "tgt_lang": "hi_IN",
 
284
  "tokenizer_class": "MBartTokenizer",
285
  "trim_offsets": true,
 
 
286
  "unk_token": "<unk>"
287
  }
 
276
  "errors": "replace",
277
  "mask_token": "<mask>",
278
  "max_len": 512,
279
+ "max_length": 128,
280
  "model_max_length": 1024,
281
  "pad_token": "<pad>",
282
  "sep_token": "</s>",
283
+ "src_lang": "hi",
284
+ "stride": 0,
285
+ "tgt_lang": "hi",
286
  "tokenizer_class": "MBartTokenizer",
287
  "trim_offsets": true,
288
+ "truncation_side": "right",
289
+ "truncation_strategy": "longest_first",
290
  "unk_token": "<unk>"
291
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9ab7eba83a79e87db243724a12249417762143b24d1e6428dc9f11c29fa98ab
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9978ca1266b644df458f44c58f94cb2fa8701d0adf54a28412b474f8abf5599e
3
  size 5368