End of training

Browse files

Files changed (5) hide show

README.md +159 -0
config.json +61 -0
generation_config.json +7 -0
model.safetensors +3 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,159 @@

+---
+library_name: transformers
+license: apache-2.0
+base_model: t5-small
+tags:
+- generated_from_trainer
+model-index:
+- name: t5-small-squad-qag
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# t5-small-squad-qag
+This model is a fine-tuned version of [t5-small](https://huggingface.co/t5-small) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.5660
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 3e-05
+- train_batch_size: 8
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 32
+- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
+- lr_scheduler_type: linear
+- num_epochs: 100
+### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 13.5664       | 0.5714  | 1    | 14.0489         |
+| 18.6422       | 1.5714  | 2    | 13.0300         |
+| 17.8287       | 2.5714  | 3    | 12.0308         |
+| 17.0632       | 3.5714  | 4    | 11.0754         |
+| 16.1516       | 4.5714  | 5    | 10.1440         |
+| 14.6881       | 5.5714  | 6    | 9.1360          |
+| 14.226        | 6.5714  | 7    | 8.1785          |
+| 12.7129       | 7.5714  | 8    | 7.4610          |
+| 11.9365       | 8.5714  | 9    | 6.7690          |
+| 10.6449       | 9.5714  | 10   | 6.0893          |
+| 9.8716        | 10.5714 | 11   | 5.4196          |
+| 9.1578        | 11.5714 | 12   | 4.7783          |
+| 9.1422        | 12.5714 | 13   | 4.1559          |
+| 8.0235        | 13.5714 | 14   | 3.5585          |
+| 7.2521        | 14.5714 | 15   | 3.0355          |
+| 6.9468        | 15.5714 | 16   | 2.5595          |
+| 6.3573        | 16.5714 | 17   | 2.1188          |
+| 6.0266        | 17.5714 | 18   | 1.7322          |
+| 5.9024        | 18.5714 | 19   | 1.3607          |
+| 4.5874        | 19.5714 | 20   | 1.0799          |
+| 4.7103        | 20.5714 | 21   | 0.9026          |
+| 4.5861        | 21.5714 | 22   | 0.8025          |
+| 4.1161        | 22.5714 | 23   | 0.7488          |
+| 3.805         | 23.5714 | 24   | 0.7257          |
+| 3.3272        | 24.5714 | 25   | 0.7164          |
+| 3.2706        | 25.5714 | 26   | 0.7117          |
+| 3.368         | 26.5714 | 27   | 0.7095          |
+| 3.3499        | 27.5714 | 28   | 0.7062          |
+| 2.9882        | 28.5714 | 29   | 0.7011          |
+| 2.798         | 29.5714 | 30   | 0.6939          |
+| 2.7807        | 30.5714 | 31   | 0.6856          |
+| 2.6064        | 31.5714 | 32   | 0.6774          |
+| 2.537         | 32.5714 | 33   | 0.6680          |
+| 2.5005        | 33.5714 | 34   | 0.6602          |
+| 2.4666        | 34.5714 | 35   | 0.6530          |
+| 2.919         | 35.5714 | 36   | 0.6471          |
+| 2.3748        | 36.5714 | 37   | 0.6416          |
+| 2.3416        | 37.5714 | 38   | 0.6363          |
+| 2.3233        | 38.5714 | 39   | 0.6312          |
+| 2.2107        | 39.5714 | 40   | 0.6265          |
+| 2.2191        | 40.5714 | 41   | 0.6220          |
+| 2.1931        | 41.5714 | 42   | 0.6178          |
+| 2.0958        | 42.5714 | 43   | 0.6136          |
+| 1.9957        | 43.5714 | 44   | 0.6095          |
+| 2.0614        | 44.5714 | 45   | 0.6055          |
+| 2.0534        | 45.5714 | 46   | 0.6019          |
+| 2.0221        | 46.5714 | 47   | 0.5987          |
+| 1.8483        | 47.5714 | 48   | 0.5957          |
+| 1.9068        | 48.5714 | 49   | 0.5929          |
+| 1.9266        | 49.5714 | 50   | 0.5903          |
+| 1.8266        | 50.5714 | 51   | 0.5882          |
+| 1.7679        | 51.5714 | 52   | 0.5864          |
+| 1.7766        | 52.5714 | 53   | 0.5851          |
+| 1.7592        | 53.5714 | 54   | 0.5840          |
+| 1.8102        | 54.5714 | 55   | 0.5832          |
+| 1.6658        | 55.5714 | 56   | 0.5826          |
+| 1.6261        | 56.5714 | 57   | 0.5820          |
+| 1.6287        | 57.5714 | 58   | 0.5816          |
+| 1.6453        | 58.5714 | 59   | 0.5814          |
+| 1.5957        | 59.5714 | 60   | 0.5813          |
+| 1.5529        | 60.5714 | 61   | 0.5813          |
+| 1.524         | 61.5714 | 62   | 0.5814          |
+| 1.5488        | 62.5714 | 63   | 0.5815          |
+| 1.4817        | 63.5714 | 64   | 0.5817          |
+| 1.5539        | 64.5714 | 65   | 0.5820          |
+| 1.4465        | 65.5714 | 66   | 0.5821          |
+| 1.4489        | 66.5714 | 67   | 0.5820          |
+| 1.4475        | 67.5714 | 68   | 0.5820          |
+| 1.4058        | 68.5714 | 69   | 0.5819          |
+| 1.347         | 69.5714 | 70   | 0.5817          |
+| 1.42          | 70.5714 | 71   | 0.5815          |
+| 1.3531        | 71.5714 | 72   | 0.5813          |
+| 1.5894        | 72.5714 | 73   | 0.5811          |
+| 1.3858        | 73.5714 | 74   | 0.5808          |
+| 1.3335        | 74.5714 | 75   | 0.5802          |
+| 1.3272        | 75.5714 | 76   | 0.5796          |
+| 1.3593        | 76.5714 | 77   | 0.5791          |
+| 1.2942        | 77.5714 | 78   | 0.5785          |
+| 1.3547        | 78.5714 | 79   | 0.5778          |
+| 1.3148        | 79.5714 | 80   | 0.5767          |
+| 1.3101        | 80.5714 | 81   | 0.5757          |
+| 1.2415        | 81.5714 | 82   | 0.5747          |
+| 1.2511        | 82.5714 | 83   | 0.5738          |
+| 1.324         | 83.5714 | 84   | 0.5730          |
+| 1.2649        | 84.5714 | 85   | 0.5723          |
+| 1.2953        | 85.5714 | 86   | 0.5717          |
+| 1.2885        | 86.5714 | 87   | 0.5711          |
+| 1.2763        | 87.5714 | 88   | 0.5704          |
+| 1.3113        | 88.5714 | 89   | 0.5698          |
+| 1.3509        | 89.5714 | 90   | 0.5694          |
+| 1.2008        | 90.5714 | 91   | 0.5689          |
+| 1.2398        | 91.5714 | 92   | 0.5685          |
+| 1.2502        | 92.5714 | 93   | 0.5680          |
+| 1.2558        | 93.5714 | 94   | 0.5676          |
+| 1.2674        | 94.5714 | 95   | 0.5672          |
+| 1.1993        | 95.5714 | 96   | 0.5668          |
+| 1.3029        | 96.5714 | 97   | 0.5665          |
+| 1.2861        | 97.5714 | 98   | 0.5662          |
+| 1.2414        | 98.5714 | 99   | 0.5661          |
+| 1.2446        | 99.5714 | 100  | 0.5660          |
+### Framework versions
+- Transformers 4.48.3
+- Pytorch 2.5.1+cu124
+- Datasets 3.3.0
+- Tokenizers 0.21.0

config.json ADDED Viewed

	@@ -0,0 +1,61 @@

+{
+  "_name_or_path": "t5-small",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 512,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "relu",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "relu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": false,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "n_positions": 512,
+  "num_decoder_layers": 6,
+  "num_heads": 8,
+  "num_layers": 6,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 200,
+      "min_length": 30,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4,
+      "prefix": "summarize: "
+    },
+    "translation_en_to_de": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to German: "
+    },
+    "translation_en_to_fr": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to French: "
+    },
+    "translation_en_to_ro": {
+      "early_stopping": true,
+      "max_length": 300,
+      "num_beams": 4,
+      "prefix": "translate English to Romanian: "
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "vocab_size": 32128
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.48.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08ef88e143767f3315ae88f36f71844214e28d73e33245a033a92a1c5af0e106
+size 242041896

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:2167eeeed71fa9b4d341fc618662eadff6356bde117eb3d8a686c7d3116cd820
+size 5368