End of training

Browse files

Files changed (5) hide show

README.md +159 -0
config.json +61 -0
generation_config.json +7 -0
model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: t5-base
+tags:
+- generated_from_trainer
+model-index:
+- name: t5-base-squad-qag-c
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# t5-base-squad-qag-c
+This model is a fine-tuned version of [t5-base](https://huggingface.co/t5-base) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.1841
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 100
+### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 15.4767       | 0.5714  | 1    | 18.0250         |
+| 20.3032       | 1.5714  | 2    | 15.4582         |
+| 18.4399       | 2.5714  | 3    | 13.1504         |
+| 17.1036       | 3.5714  | 4    | 11.2818         |
+| 15.6848       | 4.5714  | 5    | 9.9161          |
+| 13.6358       | 5.5714  | 6    | 8.8008          |
+| 11.9557       | 6.5714  | 7    | 7.8137          |
+| 11.0088       | 7.5714  | 8    | 6.8813          |
+| 9.0408        | 8.5714  | 9    | 6.0711          |
+| 7.8859        | 9.5714  | 10   | 5.3071          |
+| 6.9191        | 10.5714 | 11   | 4.8648          |
+| 6.0631        | 11.5714 | 12   | 4.8851          |
+| 4.7754        | 12.5714 | 13   | 4.9477          |
+| 4.1728        | 13.5714 | 14   | 4.9322          |
+| 3.5864        | 14.5714 | 15   | 4.5304          |
+| 2.9632        | 15.5714 | 16   | 3.4513          |
+| 2.7396        | 16.5714 | 17   | 2.1034          |
+| 2.3785        | 17.5714 | 18   | 1.1440          |
+| 2.193         | 18.5714 | 19   | 0.6816          |
+| 2.1194        | 19.5714 | 20   | 0.5207          |
+| 1.8983        | 20.5714 | 21   | 0.4698          |
+| 1.8579        | 21.5714 | 22   | 0.4504          |
+| 1.7537        | 22.5714 | 23   | 0.4418          |
+| 1.6646        | 23.5714 | 24   | 0.4355          |
+| 1.5684        | 24.5714 | 25   | 0.4285          |
+| 1.5147        | 25.5714 | 26   | 0.4223          |
+| 1.3791        | 26.5714 | 27   | 0.4167          |
+| 1.2843        | 27.5714 | 28   | 0.4123          |
+| 1.2089        | 28.5714 | 29   | 0.4094          |
+| 1.1636        | 29.5714 | 30   | 0.4085          |
+| 1.0997        | 30.5714 | 31   | 0.4075          |
+| 1.0206        | 31.5714 | 32   | 0.4064          |
+| 0.9747        | 32.5714 | 33   | 0.4038          |
+| 0.9332        | 33.5714 | 34   | 0.4009          |
+| 0.9319        | 34.5714 | 35   | 0.3970          |
+| 0.8823        | 35.5714 | 36   | 0.3917          |
+| 0.8401        | 36.5714 | 37   | 0.3856          |
+| 0.8527        | 37.5714 | 38   | 0.3770          |
+| 0.7512        | 38.5714 | 39   | 0.3655          |
+| 0.797         | 39.5714 | 40   | 0.3536          |
+| 0.765         | 40.5714 | 41   | 0.3407          |
+| 0.7556        | 41.5714 | 42   | 0.3280          |
+| 0.7198        | 42.5714 | 43   | 0.3157          |
+| 0.7115        | 43.5714 | 44   | 0.3064          |
+| 0.7074        | 44.5714 | 45   | 0.2981          |
+| 0.639         | 45.5714 | 46   | 0.2905          |
+| 0.6821        | 46.5714 | 47   | 0.2846          |
+| 0.6098        | 47.5714 | 48   | 0.2789          |
+| 0.6467        | 48.5714 | 49   | 0.2736          |
+| 0.6593        | 49.5714 | 50   | 0.2677          |
+| 0.5884        | 50.5714 | 51   | 0.2619          |
+| 0.6107        | 51.5714 | 52   | 0.2562          |
+| 0.6082        | 52.5714 | 53   | 0.2512          |
+| 0.5592        | 53.5714 | 54   | 0.2470          |
+| 0.6085        | 54.5714 | 55   | 0.2429          |
+| 0.5684        | 55.5714 | 56   | 0.2396          |
+| 0.5467        | 56.5714 | 57   | 0.2360          |
+| 0.5505        | 57.5714 | 58   | 0.2335          |
+| 0.5196        | 58.5714 | 59   | 0.2307          |
+| 0.5306        | 59.5714 | 60   | 0.2280          |
+| 0.5087        | 60.5714 | 61   | 0.2253          |
+| 0.5083        | 61.5714 | 62   | 0.2229          |
+| 0.5099        | 62.5714 | 63   | 0.2208          |
+| 0.4928        | 63.5714 | 64   | 0.2186          |
+| 0.4974        | 64.5714 | 65   | 0.2166          |
+| 0.4766        | 65.5714 | 66   | 0.2144          |
+| 0.4764        | 66.5714 | 67   | 0.2119          |
+| 0.4599        | 67.5714 | 68   | 0.2091          |
+| 0.496         | 68.5714 | 69   | 0.2066          |
+| 0.3969        | 69.5714 | 70   | 0.2042          |
+| 0.4769        | 70.5714 | 71   | 0.2018          |
+| 0.4399        | 71.5714 | 72   | 0.1997          |
+| 0.4417        | 72.5714 | 73   | 0.1977          |
+| 0.4203        | 73.5714 | 74   | 0.1958          |
+| 0.4459        | 74.5714 | 75   | 0.1942          |
+| 0.3907        | 75.5714 | 76   | 0.1927          |
+| 0.4548        | 76.5714 | 77   | 0.1917          |
+| 0.3993        | 77.5714 | 78   | 0.1908          |
+| 0.439         | 78.5714 | 79   | 0.1901          |
+| 0.4249        | 79.5714 | 80   | 0.1893          |
+| 0.4237        | 80.5714 | 81   | 0.1886          |
+| 0.4178        | 81.5714 | 82   | 0.1881          |
+| 0.4076        | 82.5714 | 83   | 0.1876          |
+| 0.4216        | 83.5714 | 84   | 0.1870          |
+| 0.3817        | 84.5714 | 85   | 0.1864          |
+| 0.3956        | 85.5714 | 86   | 0.1861          |
+| 0.4046        | 86.5714 | 87   | 0.1858          |
+| 0.3896        | 87.5714 | 88   | 0.1855          |
+| 0.3933        | 88.5714 | 89   | 0.1854          |
+| 0.4152        | 89.5714 | 90   | 0.1852          |
+| 0.3682        | 90.5714 | 91   | 0.1850          |
+| 0.4242        | 91.5714 | 92   | 0.1848          |
+| 0.3866        | 92.5714 | 93   | 0.1847          |
+| 0.3844        | 93.5714 | 94   | 0.1846          |
+| 0.3922        | 94.5714 | 95   | 0.1845          |
+| 0.3621        | 95.5714 | 96   | 0.1844          |
+| 0.3854        | 96.5714 | 97   | 0.1843          |
+| 0.3991        | 97.5714 | 98   | 0.1842          |
+| 0.3591        | 98.5714 | 99   | 0.1841          |
+| 0.3664        | 99.5714 | 100  | 0.1841          |
+### Framework versions
+- Transformers 4.48.3
+- Pytorch 2.5.1+cu124
+- Datasets 3.3.0
+- Tokenizers 0.21.0

config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "t5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 3072,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 32128
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.48.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fe74376afb24b8ca3667d4006d2f7ad4b7f676697d0f9a3e241ae57bafaeb01f
+size 891644712

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1027cd65c9ec81bacca50340cbfd541dbad41867d4b2168b4c102f302c1f090
+size 5368