Adzka/reward-model-distilbert-indo

Browse files

Files changed (10) hide show

README.md +13 -9
config.json +20 -28
emissions.csv +2 -1
model.safetensors +2 -2
runs/Jul02_00-27-47_DESKTOP-HH0RPGN/events.out.tfevents.1719854873.DESKTOP-HH0RPGN.6200.0 +3 -0
special_tokens_map.json +5 -49
tokenizer.json +0 -0
tokenizer_config.json +20 -22
training_args.bin +1 -1
vocab.txt +0 -0

README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 ---
 license: mit
-base_model: w11wo/indonesian-roberta-base-sentiment-classifier
 tags:
 - generated_from_trainer
 metrics:
@@ -15,10 +15,10 @@ should probably proofread and complete it, then remove this comment. -->
 # test-reward-model
-This model is a fine-tuned version of [w11wo/indonesian-roberta-base-sentiment-classifier](https://huggingface.co/w11wo/indonesian-roberta-base-sentiment-classifier) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6974
-- Accuracy: 0.7368
 ## Model description
@@ -43,16 +43,20 @@ The following hyperparameters were used during training:
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
-- num_epochs: 5
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
-| 0.5895        | 1.25  | 50   | 0.6307          | 0.6842   |
-| 0.3559        | 2.5   | 100  | 0.6381          | 0.7105   |
-| 0.1523        | 3.75  | 150  | 0.6340          | 0.7105   |
-| 0.0694        | 5.0   | 200  | 0.6974          | 0.7368   |
 ### Framework versions

 ---
 license: mit
+base_model: cahya/distilbert-base-indonesian
 tags:
 - generated_from_trainer
 metrics:
 # test-reward-model
+This model is a fine-tuned version of [cahya/distilbert-base-indonesian](https://huggingface.co/cahya/distilbert-base-indonesian) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8697
+- Accuracy: 0.6316
 ## Model description
 - seed: 42
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: linear
+- num_epochs: 10
 ### Training results
 | Training Loss | Epoch | Step | Validation Loss | Accuracy |
 |:-------------:|:-----:|:----:|:---------------:|:--------:|
+| 0.6672        | 1.25  | 50   | 0.5611          | 0.6316   |
+| 0.3793        | 2.5   | 100  | 0.6801          | 0.6579   |
+| 0.186         | 3.75  | 150  | 0.9431          | 0.6711   |
+| 0.0949        | 5.0   | 200  | 0.6777          | 0.6579   |
+| 0.0602        | 6.25  | 250  | 0.7489          | 0.6316   |
+| 0.0468        | 7.5   | 300  | 0.7410          | 0.6447   |
+| 0.0228        | 8.75  | 350  | 0.9266          | 0.6316   |
+| 0.019         | 10.0  | 400  | 0.8697          | 0.6316   |
 ### Framework versions

config.json CHANGED Viewed

@@ -1,39 +1,31 @@
 {
-  "_name_or_path": "w11wo/indonesian-roberta-base-sentiment-classifier",
   "architectures": [
-    "RobertaForSequenceClassification"
   ],
-  "attention_probs_dropout_prob": 0.1,
-  "bos_token_id": 0,
-  "classifier_dropout": null,
-  "eos_token_id": 2,
-  "gradient_checkpointing": false,
-  "hidden_act": "gelu",
-  "hidden_dropout_prob": 0.1,
-  "hidden_size": 768,
   "id2label": {
-    "0": "positive",
-    "1": "neutral",
-    "2": "negative"
   },
   "initializer_range": 0.02,
-  "intermediate_size": 3072,
   "label2id": {
-    "negative": 2,
-    "neutral": 1,
-    "positive": 0
   },
-  "layer_norm_eps": 1e-05,
-  "max_position_embeddings": 514,
-  "model_type": "roberta",
-  "num_attention_heads": 12,
-  "num_hidden_layers": 12,
-  "pad_token_id": 1,
-  "position_embedding_type": "absolute",
-  "problem_type": "single_label_classification",
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
-  "type_vocab_size": 1,
-  "use_cache": true,
-  "vocab_size": 50265
 }

 {
+  "_name_or_path": "cahya/distilbert-base-indonesian",
+  "activation": "gelu",
   "architectures": [
+    "DistilBertForSequenceClassification"
   ],
+  "attention_dropout": 0.1,
+  "dim": 768,
+  "dropout": 0.1,
+  "hidden_dim": 3072,
   "id2label": {
+    "0": "LABEL_0"
   },
   "initializer_range": 0.02,
   "label2id": {
+    "LABEL_0": 0
   },
+  "max_position_embeddings": 512,
+  "model_type": "distilbert",
+  "n_heads": 12,
+  "n_layers": 6,
+  "output_hidden_states": true,
+  "pad_token_id": 0,
+  "qa_dropout": 0.1,
+  "seq_classif_dropout": 0.2,
+  "sinusoidal_pos_embds": true,
+  "tie_weights_": true,
   "torch_dtype": "float32",
   "transformers_version": "4.38.2",
+  "vocab_size": 32000
 }

emissions.csv CHANGED Viewed

@@ -1,2 +1,3 @@
 timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
-2024-07-02T00:11:50,codecarbon,e6298b50-8854-4fb6-a92d-43999c4601c4,338.201961517334,0.010694947656611987,3.1622961642887557e-05,42.5,108.43,11.946327209472656,0.003992266833119922,0.012046153513913619,0.0011206903215284546,0.017159110668561992,Indonesia,IDN,west java,,,Windows-10-10.0.19045-SP0,3.11.5,2.2.3,8,12th Gen Intel(R) Core(TM) i3-12100F,1,1 x NVIDIA GeForce RTX 3060,107.6071,-6.9217,31.85687255859375,machine,N,1.0

 timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,gpu_energy,ram_energy,energy_consumed,country_name,country_iso_code,region,cloud_provider,cloud_region,os,python_version,codecarbon_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud,pue
+2024-07-02T00:11:50,codecarbon,e6298b50-8854-4fb6-a92d-43999c4601c4,338.201961517334,0.0106949476566119,3.162296164288756e-05,42.5,108.43,11.946327209472656,0.0039922668331199,0.0120461535139136,0.0011206903215284,0.0171591106685619,Indonesia,IDN,west java,,,Windows-10-10.0.19045-SP0,3.11.5,2.2.3,8,12th Gen Intel(R) Core(TM) i3-12100F,1,1 x NVIDIA GeForce RTX 3060,107.6071,-6.9217,31.85687255859375,machine,N,1.0
+2024-07-02T00:31:35,codecarbon,d29dc800-1005-4d6f-8e2e-8bab915a98a4,221.70085859298706,0.008340818735775607,3.762195053600684e-05,42.5,166.724,11.946327209472656,0.0026170430142018536,0.010030346762774322,0.0007347263725223773,0.013382116149498554,Indonesia,IDN,west java,,,Windows-10-10.0.19045-SP0,3.11.5,2.2.3,8,12th Gen Intel(R) Core(TM) i3-12100F,1,1 x NVIDIA GeForce RTX 3060,107.6071,-6.9217,31.85687255859375,machine,N,1.0

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a8aa724f42799c8235ac65411e2cd383b225fa2c7d71dd13060f3766ce2b58bf
-size 498615900

 version https://git-lfs.github.com/spec/v1
+oid sha256:52c8b4a93c799a67bf0b574cd480842bd90cc6ab22bc095b5dc00ec43a5dec68
+size 272369900

runs/Jul02_00-27-47_DESKTOP-HH0RPGN/events.out.tfevents.1719854873.DESKTOP-HH0RPGN.6200.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7bc5c631cb7aeaedf20e7f6f491decb2eec11447ad514329c043b7c5a7613158
+size 9196

special_tokens_map.json CHANGED Viewed

@@ -1,51 +1,7 @@
 {
-  "bos_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "cls_token": {
-    "content": "<s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "eos_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "mask_token": {
-    "content": "<mask>",
-    "lstrip": true,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "pad_token": {
-    "content": "<pad>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "sep_token": {
-    "content": "</s>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  },
-  "unk_token": {
-    "content": "<unk>",
-    "lstrip": false,
-    "normalized": false,
-    "rstrip": false,
-    "single_word": false
-  }
 }

 {
+  "cls_token": "[CLS]",
+  "mask_token": "[MASK]",
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "unk_token": "[UNK]"
 }

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json CHANGED Viewed

@@ -1,57 +1,55 @@
 {
-  "add_prefix_space": false,
   "added_tokens_decoder": {
     "0": {
-      "content": "<s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "1": {
-      "content": "<pad>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "2": {
-      "content": "</s>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "3": {
-      "content": "<unk>",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
-    "4": {
-      "content": "<mask>",
-      "lstrip": true,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
-  "bos_token": "<s>",
   "clean_up_tokenization_spaces": true,
-  "cls_token": "<s>",
-  "eos_token": "</s>",
-  "errors": "replace",
-  "mask_token": "<mask>",
-  "model_max_length": 1000000000000000019884624838656,
-  "pad_token": "<pad>",
-  "sep_token": "</s>",
-  "tokenizer_class": "RobertaTokenizer",
-  "trim_offsets": true,
-  "unk_token": "<unk>"
 }

 {
   "added_tokens_decoder": {
     "0": {
+      "content": "[PAD]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "100": {
+      "content": "[UNK]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "101": {
+      "content": "[CLS]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "102": {
+      "content": "[SEP]",
       "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     },
+    "103": {
+      "content": "[MASK]",
+      "lstrip": false,
       "normalized": false,
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "clean_up_tokenization_spaces": true,
+  "cls_token": "[CLS]",
+  "do_lower_case": true,
+  "mask_token": "[MASK]",
+  "model_max_length": 512,
+  "pad_token": "[PAD]",
+  "sep_token": "[SEP]",
+  "strip_accents": null,
+  "tokenize_chinese_chars": true,
+  "tokenizer_class": "BertTokenizer",
+  "unk_token": "[UNK]"
 }

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d0a0b921d70ad183817a2957b4977890a97177522e7c26660841711f33040833
 size 4920

 version https://git-lfs.github.com/spec/v1
+oid sha256:9de0ed44dc3865bb9a68a129a145f8782eecf346d387339b335acb99144c27ea
 size 4920

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff