ShaunThayil/roberta_1

Browse files

Files changed (4) hide show

README.md +20 -28
config.json +20 -17
pytorch_model.bin +2 -2
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
-license: apache-2.0
-base_model: distilbert-base-uncased
 tags:
 - generated_from_trainer
 metrics:
@@ -18,13 +18,13 @@ should probably proofread and complete it, then remove this comment. -->
 # training-1
-This model is a fine-tuned version of [distilbert-base-uncased](https://huggingface.co/distilbert-base-uncased) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.0292
-- Accuracy: 0.9940
-- Precision: 0.9982
-- Recall: 0.9893
-- F1: 0.9937
 ## Model description
@@ -43,7 +43,7 @@ More information needed
 ### Training hyperparameters
 The following hyperparameters were used during training:
-- learning_rate: 2e-05
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
@@ -55,29 +55,21 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1     |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
-| No log        | 0.25  | 85   | 0.0345          | 0.9931   | 0.9982    | 0.9875 | 0.9928 |
-| No log        | 0.5   | 170  | 0.0428          | 0.9905   | 1.0       | 0.9804 | 0.9901 |
-| No log        | 0.75  | 255  | 0.0295          | 0.9940   | 0.9982    | 0.9893 | 0.9937 |
-| 0.0811        | 1.0   | 340  | 0.0237          | 0.9957   | 1.0       | 0.9911 | 0.9955 |
-| 0.0811        | 1.25  | 425  | 0.0618          | 0.9897   | 1.0       | 0.9786 | 0.9892 |
-| 0.0811        | 1.5   | 510  | 0.0338          | 0.9940   | 1.0       | 0.9875 | 0.9937 |
-| 0.0811        | 1.76  | 595  | 0.0373          | 0.9931   | 1.0       | 0.9857 | 0.9928 |
-| 0.0267        | 2.01  | 680  | 0.0382          | 0.9923   | 0.9982    | 0.9857 | 0.9919 |
-| 0.0267        | 2.26  | 765  | 0.0271          | 0.9948   | 1.0       | 0.9893 | 0.9946 |
-| 0.0267        | 2.51  | 850  | 0.0355          | 0.9940   | 1.0       | 0.9875 | 0.9937 |
-| 0.0267        | 2.76  | 935  | 0.0397          | 0.9940   | 1.0       | 0.9875 | 0.9937 |
-| 0.0187        | 3.01  | 1020 | 0.0270          | 0.9940   | 0.9982    | 0.9893 | 0.9937 |
-| 0.0187        | 3.26  | 1105 | 0.0246          | 0.9948   | 0.9982    | 0.9911 | 0.9946 |
-| 0.0187        | 3.51  | 1190 | 0.0340          | 0.9940   | 1.0       | 0.9875 | 0.9937 |
-| 0.0187        | 3.76  | 1275 | 0.0242          | 0.9957   | 1.0       | 0.9911 | 0.9955 |
-| 0.0093        | 4.01  | 1360 | 0.0224          | 0.9948   | 0.9982    | 0.9911 | 0.9946 |
-| 0.0093        | 4.26  | 1445 | 0.0275          | 0.9940   | 0.9982    | 0.9893 | 0.9937 |
-| 0.0093        | 4.51  | 1530 | 0.0285          | 0.9940   | 0.9982    | 0.9893 | 0.9937 |
-| 0.0093        | 4.76  | 1615 | 0.0292          | 0.9940   | 0.9982    | 0.9893 | 0.9937 |
 ### Framework versions
 - Transformers 4.33.1
 - Pytorch 2.2.0.dev20230913+cu121
 - Tokenizers 0.13.3

 ---
+license: mit
+base_model: roberta-base
 tags:
 - generated_from_trainer
 metrics:
 # training-1
+This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0448
+- Accuracy: 0.9937
+- Precision: 0.9912
+- Recall: 0.9859
+- F1: 0.9885
 ## Model description
 ### Training hyperparameters
 The following hyperparameters were used during training:
+- learning_rate: 1e-05
 - train_batch_size: 8
 - eval_batch_size: 8
 - seed: 42
 | Training Loss | Epoch | Step | Validation Loss | Accuracy | Precision | Recall | F1     |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|:---------:|:------:|:------:|
+| No log        | 0.5   | 302  | 0.0546          | 0.9870   | 0.9737    | 0.9789 | 0.9763 |
+| No log        | 1.0   | 604  | 0.0511          | 0.9913   | 0.9911    | 0.9771 | 0.9840 |
+| 0.1032        | 1.5   | 906  | 0.0558          | 0.9899   | 0.9807    | 0.9824 | 0.9815 |
+| 0.1032        | 2.0   | 1208 | 0.0467          | 0.9928   | 0.9982    | 0.9754 | 0.9866 |
+| 0.0353        | 2.5   | 1510 | 0.0411          | 0.9937   | 0.9929    | 0.9842 | 0.9885 |
+| 0.0353        | 3.0   | 1812 | 0.0460          | 0.9932   | 0.9911    | 0.9842 | 0.9876 |
+| 0.0183        | 3.49  | 2114 | 0.0423          | 0.9937   | 0.9947    | 0.9824 | 0.9885 |
+| 0.0183        | 3.99  | 2416 | 0.0476          | 0.9932   | 0.9911    | 0.9842 | 0.9876 |
+| 0.013         | 4.49  | 2718 | 0.0463          | 0.9932   | 0.9911    | 0.9842 | 0.9876 |
+| 0.013         | 4.99  | 3020 | 0.0448          | 0.9937   | 0.9912    | 0.9859 | 0.9885 |
 ### Framework versions
 - Transformers 4.33.1
 - Pytorch 2.2.0.dev20230913+cu121
+- Datasets 2.14.5
 - Tokenizers 0.13.3

config.json CHANGED Viewed

@@ -1,25 +1,28 @@
 {
-  "_name_or_path": "distilbert-base-uncased",
-  "activation": "gelu",
   "architectures": [
-    "DistilBertForSequenceClassification"
   ],
-  "attention_dropout": 0.1,
-  "dim": 768,
-  "dropout": 0.1,
-  "hidden_dim": 3072,
   "initializer_range": 0.02,
-  "max_position_embeddings": 512,
-  "model_type": "distilbert",
-  "n_heads": 12,
-  "n_layers": 6,
-  "pad_token_id": 0,
   "problem_type": "single_label_classification",
-  "qa_dropout": 0.1,
-  "seq_classif_dropout": 0.2,
-  "sinusoidal_pos_embds": false,
-  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.33.1",
-  "vocab_size": 30522
 }

 {
+  "_name_or_path": "roberta-base",
   "architectures": [
+    "RobertaForSequenceClassification"
   ],
+  "attention_probs_dropout_prob": 0.1,
+  "bos_token_id": 0,
+  "classifier_dropout": null,
+  "eos_token_id": 2,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
   "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 514,
+  "model_type": "roberta",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "position_embedding_type": "absolute",
   "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.33.1",
+  "type_vocab_size": 1,
+  "use_cache": true,
+  "vocab_size": 50265
 }

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7acd063f486d2307b8db8225437c8fba5d71f6c8b7dc261b5ffad01be0373b61
-size 267855978

 version https://git-lfs.github.com/spec/v1
+oid sha256:ecdb9897e7900218afded54579bf06bd86ee0a57e867f6c8b3378d768f4fe0d2
+size 498658094

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6805874f93e682a9cec9dc0ea8f1e836bb07d9630cff35857cf4f9d1163e2f2d
 size 4472

 version https://git-lfs.github.com/spec/v1
+oid sha256:715c5e974e827a8a9120a03d7ad08bfa526ed63cbd16636e26b5ebb3ea3582fb
 size 4472