End of training

Browse files

Files changed (11) hide show

README.md +134 -0
config.json +40 -0
logs/events.out.tfevents.1722336885.183c440d058f.2175.1 +3 -0
logs/events.out.tfevents.1722340752.183c440d058f.2175.2 +3 -0
merges.txt +0 -0
model.safetensors +3 -0
special_tokens_map.json +51 -0
tokenizer.json +0 -0
tokenizer_config.json +65 -0
training_args.bin +3 -0
vocab.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,134 @@

+---
+license: apache-2.0
+base_model: projecte-aina/roberta-base-ca-v2-cased-te
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+- precision
+- recall
+- f1
+model-index:
+- name: VICH_300524_epoch_1
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# VICH_300524_epoch_1
+This model is a fine-tuned version of [projecte-aina/roberta-base-ca-v2-cased-te](https://huggingface.co/projecte-aina/roberta-base-ca-v2-cased-te) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4621
+- Accuracy: 0.906
+- Precision: 0.9081
+- Recall: 0.9060
+- F1: 0.9059
+- Ratio: 0.464
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 16
+- eval_batch_size: 16
+- seed: 47
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 32
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_ratio: 0.06
+- lr_scheduler_warmup_steps: 4
+- num_epochs: 1
+- label_smoothing_factor: 0.1
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy | Precision | Recall | F1     | Ratio |
+|:-------------:|:------:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|:-----:|
+| 1.6339        | 0.0157 | 10   | 1.1764          | 0.608    | 0.6081    | 0.608  | 0.6079 | 0.482 |
+| 0.978         | 0.0314 | 20   | 0.9387          | 0.623    | 0.6602    | 0.623  | 0.5998 | 0.741 |
+| 0.8           | 0.0472 | 30   | 0.7466          | 0.68     | 0.6914    | 0.68   | 0.6752 | 0.378 |
+| 0.7271        | 0.0629 | 40   | 0.6887          | 0.741    | 0.7504    | 0.741  | 0.7385 | 0.403 |
+| 0.699         | 0.0786 | 50   | 0.6675          | 0.76     | 0.7676    | 0.76   | 0.7583 | 0.416 |
+| 0.6985        | 0.0943 | 60   | 0.6259          | 0.779    | 0.7815    | 0.7790 | 0.7785 | 0.453 |
+| 0.6617        | 0.1101 | 70   | 0.6078          | 0.796    | 0.7960    | 0.796  | 0.7960 | 0.502 |
+| 0.6739        | 0.1258 | 80   | 0.6071          | 0.789    | 0.8057    | 0.789  | 0.7861 | 0.383 |
+| 0.6381        | 0.1415 | 90   | 0.5997          | 0.806    | 0.8114    | 0.806  | 0.8052 | 0.566 |
+| 0.6455        | 0.1572 | 100  | 0.5914          | 0.808    | 0.8102    | 0.808  | 0.8077 | 0.458 |
+| 0.6816        | 0.1730 | 110  | 0.5775          | 0.822    | 0.8239    | 0.8220 | 0.8217 | 0.462 |
+| 0.6346        | 0.1887 | 120  | 0.5757          | 0.82     | 0.8302    | 0.8200 | 0.8186 | 0.412 |
+| 0.5773        | 0.2044 | 130  | 0.5799          | 0.823    | 0.8336    | 0.823  | 0.8216 | 0.411 |
+| 0.6138        | 0.2201 | 140  | 0.5693          | 0.839    | 0.8435    | 0.839  | 0.8385 | 0.557 |
+| 0.5819        | 0.2358 | 150  | 0.6291          | 0.785    | 0.8291    | 0.7850 | 0.7776 | 0.317 |
+| 0.602         | 0.2516 | 160  | 0.5476          | 0.853    | 0.8531    | 0.853  | 0.8530 | 0.491 |
+| 0.5285        | 0.2673 | 170  | 0.5480          | 0.856    | 0.8573    | 0.856  | 0.8559 | 0.47  |
+| 0.6021        | 0.2830 | 180  | 0.5558          | 0.862    | 0.8701    | 0.8620 | 0.8612 | 0.426 |
+| 0.5645        | 0.2987 | 190  | 0.5382          | 0.863    | 0.8681    | 0.863  | 0.8625 | 0.441 |
+| 0.5233        | 0.3145 | 200  | 0.5309          | 0.857    | 0.8574    | 0.857  | 0.8570 | 0.517 |
+| 0.5398        | 0.3302 | 210  | 0.5396          | 0.854    | 0.8585    | 0.8540 | 0.8535 | 0.444 |
+| 0.5342        | 0.3459 | 220  | 0.5288          | 0.873    | 0.8731    | 0.873  | 0.8730 | 0.491 |
+| 0.5428        | 0.3616 | 230  | 0.5513          | 0.862    | 0.8710    | 0.862  | 0.8612 | 0.422 |
+| 0.5354        | 0.3774 | 240  | 0.5289          | 0.873    | 0.8760    | 0.873  | 0.8727 | 0.455 |
+| 0.5243        | 0.3931 | 250  | 0.5202          | 0.872    | 0.8755    | 0.872  | 0.8717 | 0.452 |
+| 0.5068        | 0.4088 | 260  | 0.5194          | 0.873    | 0.8776    | 0.873  | 0.8726 | 0.445 |
+| 0.5256        | 0.4245 | 270  | 0.5194          | 0.869    | 0.8753    | 0.869  | 0.8684 | 0.435 |
+| 0.5177        | 0.4403 | 280  | 0.5040          | 0.884    | 0.8852    | 0.884  | 0.8839 | 0.472 |
+| 0.5088        | 0.4560 | 290  | 0.5037          | 0.881    | 0.8825    | 0.881  | 0.8809 | 0.469 |
+| 0.5133        | 0.4717 | 300  | 0.5326          | 0.867    | 0.8774    | 0.867  | 0.8661 | 0.417 |
+| 0.5078        | 0.4874 | 310  | 0.4998          | 0.885    | 0.8869    | 0.885  | 0.8849 | 0.465 |
+| 0.5065        | 0.5031 | 320  | 0.4965          | 0.892    | 0.8948    | 0.892  | 0.8918 | 0.458 |
+| 0.502         | 0.5189 | 330  | 0.4947          | 0.89     | 0.8920    | 0.89   | 0.8899 | 0.464 |
+| 0.5119        | 0.5346 | 340  | 0.4931          | 0.889    | 0.8893    | 0.889  | 0.8890 | 0.487 |
+| 0.5426        | 0.5503 | 350  | 0.5012          | 0.882    | 0.8880    | 0.8820 | 0.8815 | 0.438 |
+| 0.5861        | 0.5660 | 360  | 0.4899          | 0.891    | 0.8958    | 0.891  | 0.8907 | 0.445 |
+| 0.5477        | 0.5818 | 370  | 0.4839          | 0.891    | 0.8929    | 0.891  | 0.8909 | 0.465 |
+| 0.5026        | 0.5975 | 380  | 0.4788          | 0.892    | 0.8920    | 0.892  | 0.8920 | 0.498 |
+| 0.4688        | 0.6132 | 390  | 0.4863          | 0.902    | 0.9037    | 0.902  | 0.9019 | 0.468 |
+| 0.4842        | 0.6289 | 400  | 0.4846          | 0.902    | 0.9041    | 0.9020 | 0.9019 | 0.464 |
+| 0.4897        | 0.6447 | 410  | 0.4848          | 0.9      | 0.9047    | 0.9000 | 0.8997 | 0.446 |
+| 0.491         | 0.6604 | 420  | 0.4742          | 0.901    | 0.9030    | 0.901  | 0.9009 | 0.465 |
+| 0.4557        | 0.6761 | 430  | 0.4827          | 0.891    | 0.8910    | 0.891  | 0.8910 | 0.495 |
+| 0.4858        | 0.6918 | 440  | 0.4803          | 0.896    | 0.8968    | 0.896  | 0.8959 | 0.478 |
+| 0.4802        | 0.7075 | 450  | 0.4841          | 0.897    | 0.8997    | 0.897  | 0.8968 | 0.459 |
+| 0.5116        | 0.7233 | 460  | 0.4761          | 0.896    | 0.8981    | 0.896  | 0.8959 | 0.464 |
+| 0.5021        | 0.7390 | 470  | 0.4727          | 0.905    | 0.9060    | 0.905  | 0.9049 | 0.475 |
+| 0.4986        | 0.7547 | 480  | 0.4717          | 0.903    | 0.9063    | 0.903  | 0.9028 | 0.455 |
+| 0.4721        | 0.7704 | 490  | 0.4757          | 0.906    | 0.9104    | 0.9060 | 0.9057 | 0.448 |
+| 0.42          | 0.7862 | 500  | 0.4705          | 0.905    | 0.9072    | 0.905  | 0.9049 | 0.463 |
+| 0.4596        | 0.8019 | 510  | 0.4706          | 0.9      | 0.9011    | 0.9    | 0.8999 | 0.474 |
+| 0.5301        | 0.8176 | 520  | 0.4741          | 0.903    | 0.9055    | 0.903  | 0.9029 | 0.461 |
+| 0.486         | 0.8333 | 530  | 0.4731          | 0.903    | 0.9063    | 0.903  | 0.9028 | 0.455 |
+| 0.4794        | 0.8491 | 540  | 0.4694          | 0.903    | 0.9055    | 0.903  | 0.9029 | 0.461 |
+| 0.4848        | 0.8648 | 550  | 0.4667          | 0.906    | 0.9071    | 0.906  | 0.9059 | 0.474 |
+| 0.4958        | 0.8805 | 560  | 0.4663          | 0.901    | 0.9037    | 0.901  | 0.9008 | 0.459 |
+| 0.4577        | 0.8962 | 570  | 0.4675          | 0.902    | 0.9054    | 0.9020 | 0.9018 | 0.454 |
+| 0.5405        | 0.9119 | 580  | 0.4637          | 0.903    | 0.9055    | 0.903  | 0.9029 | 0.461 |
+| 0.4792        | 0.9277 | 590  | 0.4622          | 0.907    | 0.9090    | 0.907  | 0.9069 | 0.465 |
+| 0.4419        | 0.9434 | 600  | 0.4622          | 0.908    | 0.9101    | 0.9080 | 0.9079 | 0.464 |
+| 0.4685        | 0.9591 | 610  | 0.4624          | 0.906    | 0.9084    | 0.9060 | 0.9059 | 0.462 |
+| 0.4794        | 0.9748 | 620  | 0.4623          | 0.904    | 0.9063    | 0.9040 | 0.9039 | 0.462 |
+| 0.4673        | 0.9906 | 630  | 0.4622          | 0.905    | 0.9072    | 0.905  | 0.9049 | 0.463 |
+### Framework versions
+- Transformers 4.42.4
+- Pytorch 2.3.1+cu121
+- Datasets 2.20.0
+- Tokenizers 0.19.1

config.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "_name_or_path": "projecte-aina/roberta-base-ca-v2-cased-te",
+  "architectures": [
+    "RobertaForSequenceClassification"
+  ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "finetuning_task": "mnli",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "ENTAILMENT",
+    "1": "NEUTRAL",
+    "2": "CONTRADICTION"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "CONTRADICTION": 2,
+    "ENTAILMENT": 0,
+    "NEUTRAL": 1
+  },
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.42.4",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50262
+}

logs/events.out.tfevents.1722336885.183c440d058f.2175.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ecb58d1199d147ceac043c98e3a8b4d6646b5ce5cbe156b557def357931a2f3
+size 51666

logs/events.out.tfevents.1722340752.183c440d058f.2175.2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e48f3873b45ab043a2bac7a1956176dad5d73ce5a262b919c716805ea18e1ae7
+size 609

merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a604ff801fb560f906d7a8363af9d371d3994454b533d3da307261b259ae871
+size 498606684

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,65 @@

+{
+  "add_prefix_space": true,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "4": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "max_len": 512,
+  "max_length": 512,
+  "model_max_length": 512,
+  "pad_to_multiple_of": null,
+  "pad_token": "<pad>",
+  "pad_token_type_id": 0,
+  "padding_side": "right",
+  "sep_token": "</s>",
+  "stride": 0,
+  "tokenizer_class": "RobertaTokenizer",
+  "trim_offsets": true,
+  "truncation_side": "right",
+  "truncation_strategy": "longest_first",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b0d889fd179914f6d99682defe7f718368fbc64d5df1587da3e32754915b1362
+size 5176

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff