ElDestructo commited on
Commit
9e8cd21
·
verified ·
1 Parent(s): b0c17ba

End of training

Browse files
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  license: apache-2.0
3
- base_model: google/flan-t5-base
4
  tags:
5
  - generated_from_trainer
6
  model-index:
@@ -13,7 +13,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # model
15
 
16
- This model is a fine-tuned version of [google/flan-t5-base](https://huggingface.co/google/flan-t5-base) on the None dataset.
17
 
18
  ## Model description
19
 
@@ -33,12 +33,12 @@ More information needed
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.0005
36
- - train_batch_size: 8
37
- - eval_batch_size: 8
38
  - seed: 42
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
  - lr_scheduler_type: linear
41
- - num_epochs: 10
42
 
43
  ### Training results
44
 
@@ -46,7 +46,7 @@ The following hyperparameters were used during training:
46
 
47
  ### Framework versions
48
 
49
- - Transformers 4.37.2
50
- - Pytorch 2.1.0+cu121
51
  - Datasets 2.17.1
52
  - Tokenizers 0.15.2
 
1
  ---
2
  license: apache-2.0
3
+ base_model: google/flan-t5-large
4
  tags:
5
  - generated_from_trainer
6
  model-index:
 
13
 
14
  # model
15
 
16
+ This model is a fine-tuned version of [google/flan-t5-large](https://huggingface.co/google/flan-t5-large) on the None dataset.
17
 
18
  ## Model description
19
 
 
33
 
34
  The following hyperparameters were used during training:
35
  - learning_rate: 0.0005
36
+ - train_batch_size: 16
37
+ - eval_batch_size: 16
38
  - seed: 42
39
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
40
  - lr_scheduler_type: linear
41
+ - num_epochs: 4
42
 
43
  ### Training results
44
 
 
46
 
47
  ### Framework versions
48
 
49
+ - Transformers 4.38.1
50
+ - Pytorch 2.2.1+cu121
51
  - Datasets 2.17.1
52
  - Tokenizers 0.15.2
config.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "_name_or_path": "google/flan-t5-base",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
- "d_ff": 2048,
8
  "d_kv": 64,
9
- "d_model": 768,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
@@ -18,45 +18,16 @@
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
- "num_decoder_layers": 12,
22
- "num_heads": 12,
23
- "num_layers": 12,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
28
- "task_specific_params": {
29
- "summarization": {
30
- "early_stopping": true,
31
- "length_penalty": 2.0,
32
- "max_length": 200,
33
- "min_length": 30,
34
- "no_repeat_ngram_size": 3,
35
- "num_beams": 4,
36
- "prefix": "summarize: "
37
- },
38
- "translation_en_to_de": {
39
- "early_stopping": true,
40
- "max_length": 300,
41
- "num_beams": 4,
42
- "prefix": "translate English to German: "
43
- },
44
- "translation_en_to_fr": {
45
- "early_stopping": true,
46
- "max_length": 300,
47
- "num_beams": 4,
48
- "prefix": "translate English to French: "
49
- },
50
- "translation_en_to_ro": {
51
- "early_stopping": true,
52
- "max_length": 300,
53
- "num_beams": 4,
54
- "prefix": "translate English to Romanian: "
55
- }
56
- },
57
  "tie_word_embeddings": false,
58
  "torch_dtype": "float32",
59
- "transformers_version": "4.37.2",
60
  "use_cache": true,
61
  "vocab_size": 32128
62
  }
 
1
  {
2
+ "_name_or_path": "google/flan-t5-large",
3
  "architectures": [
4
  "T5ForConditionalGeneration"
5
  ],
6
  "classifier_dropout": 0.0,
7
+ "d_ff": 2816,
8
  "d_kv": 64,
9
+ "d_model": 1024,
10
  "decoder_start_token_id": 0,
11
  "dense_act_fn": "gelu_new",
12
  "dropout_rate": 0.1,
 
18
  "layer_norm_epsilon": 1e-06,
19
  "model_type": "t5",
20
  "n_positions": 512,
21
+ "num_decoder_layers": 24,
22
+ "num_heads": 16,
23
+ "num_layers": 24,
24
  "output_past": true,
25
  "pad_token_id": 0,
26
  "relative_attention_max_distance": 128,
27
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "tie_word_embeddings": false,
29
  "torch_dtype": "float32",
30
+ "transformers_version": "4.38.1",
31
  "use_cache": true,
32
  "vocab_size": 32128
33
  }
generation_config.json CHANGED
@@ -3,5 +3,5 @@
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
- "transformers_version": "4.37.2"
7
  }
 
3
  "decoder_start_token_id": 0,
4
  "eos_token_id": 1,
5
  "pad_token_id": 0,
6
+ "transformers_version": "4.38.1"
7
  }
logs/events.out.tfevents.1710315784.devi.305254.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7880a90ffb0bfa82157214111c454f7dd38901c894d02b6f3497f2394f30cf88
3
+ size 5989
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e7986a098d3918ddf468a2e09f20d35666582f40c27dbc6090e4cd6c777aa7d2
3
- size 990345064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8c68939773c650895101be528bd854196876e203346558e1203efc780f499b
3
+ size 3132668808
tokenizer_config.json CHANGED
@@ -930,16 +930,9 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "max_length": 7,
934
  "model_max_length": 512,
935
- "pad_to_multiple_of": null,
936
  "pad_token": "<pad>",
937
- "pad_token_type_id": 0,
938
- "padding_side": "right",
939
  "sp_model_kwargs": {},
940
- "stride": 0,
941
  "tokenizer_class": "T5Tokenizer",
942
- "truncation_side": "right",
943
- "truncation_strategy": "longest_first",
944
  "unk_token": "<unk>"
945
  }
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
 
933
  "model_max_length": 512,
 
934
  "pad_token": "<pad>",
 
 
935
  "sp_model_kwargs": {},
 
936
  "tokenizer_class": "T5Tokenizer",
 
 
937
  "unk_token": "<unk>"
938
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cc7887b932d1cd8a641005ba327d392d8f47b9ecb467482b336c0e93e1f1b11
3
- size 4856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4116cd5feb5028c4a2f3344520ae85965c5770a7e9b40090343211af8cf1ad9
3
+ size 5048