End of training

Browse files

Files changed (9) hide show

README.md +66 -0
all_results.json +9 -0
config.json +93 -0
model.safetensors +3 -0
preprocessor_config.json +9 -0
runs/Jun15_23-04-29_8b5aa78ceaf6/events.out.tfevents.1718492671.8b5aa78ceaf6.34.0 +3 -0
train_results.json +9 -0
trainer_state.json +315 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,66 @@

+---
+license: apache-2.0
+base_model: facebook/hubert-base-ls960
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: hubert-base960-agu-amharic
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# hubert-base960-agu-amharic
+This model is a fine-tuned version of [facebook/hubert-base-ls960](https://huggingface.co/facebook/hubert-base-ls960) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.3496
+- Accuracy: 0.9297
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 16
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:--------:|
+| 0.502         | 0.6649 | 500  | 0.4584          | 0.8707   |
+| 0.3239        | 1.3298 | 1000 | 0.3637          | 0.9013   |
+| 0.2329        | 1.9947 | 1500 | 0.3464          | 0.9148   |
+| 0.1768        | 2.6596 | 2000 | 0.4056          | 0.9126   |
+| 0.1315        | 3.3245 | 2500 | 0.3803          | 0.9163   |
+| 0.1383        | 3.9894 | 3000 | 0.3496          | 0.9297   |
+### Framework versions
+- Transformers 4.41.2
+- Pytorch 2.1.2
+- Datasets 2.19.2
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.9893617021276597,
+    "total_flos": 1.8714792090048768e+18,
+    "train_loss": 0.34261955579121905,
+    "train_runtime": 3211.0561,
+    "train_samples": 12018,
+    "train_samples_per_second": 18.713,
+    "train_steps_per_second": 1.171
+}

config.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "_name_or_path": "facebook/hubert-base-ls960",
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertForSpeechClassification"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": false,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "do_stable_layer_norm": false,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "group",
+  "feat_proj_dropout": 0.1,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "finetuning_task": "hubert_clf",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 768,
+  "id2label": {
+    "0": "01Neutral",
+    "1": "02Fearful",
+    "2": "03Happy",
+    "3": "04Sad",
+    "4": "05Angry"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "label2id": {
+    "01Neutral": 0,
+    "02Fearful": 1,
+    "03Happy": 2,
+    "04Sad": 3,
+    "05Angry": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 12,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 12,
+  "pad_token_id": 0,
+  "pooling_mode": "mean",
+  "problem_type": "single_label_classification",
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c701b4386515d350e378a6b1ae65159e8e4816e3f88921da70d7c6aa3d06c486
+size 379890236

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": false,
+  "sampling_rate": 16000
+}

runs/Jun15_23-04-29_8b5aa78ceaf6/events.out.tfevents.1718492671.8b5aa78ceaf6.34.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:957df25689a640eeb3d74e6514c2f7e63b4298d295c8b144132990841f247ee9
+size 14698

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 3.9893617021276597,
+    "total_flos": 1.8714792090048768e+18,
+    "train_loss": 0.34261955579121905,
+    "train_runtime": 3211.0561,
+    "train_samples": 12018,
+    "train_samples_per_second": 18.713,
+    "train_steps_per_second": 1.171
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,315 @@

+{
+  "best_metric": 0.3463841676712036,
+  "best_model_checkpoint": "/kaggle/working/hubert-base960-agu-amharic/checkpoint-1500",
+  "epoch": 3.9893617021276597,
+  "eval_steps": 500,
+  "global_step": 3000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.13297872340425532,
+      "grad_norm": 3.2855160236358643,
+      "learning_rate": 9.73404255319149e-06,
+      "loss": 1.4894,
+      "step": 100
+    },
+    {
+      "epoch": 0.26595744680851063,
+      "grad_norm": 14.008848190307617,
+      "learning_rate": 9.470744680851066e-06,
+      "loss": 1.1355,
+      "step": 200
+    },
+    {
+      "epoch": 0.39893617021276595,
+      "grad_norm": 5.032841205596924,
+      "learning_rate": 9.204787234042554e-06,
+      "loss": 0.7905,
+      "step": 300
+    },
+    {
+      "epoch": 0.5319148936170213,
+      "grad_norm": 6.931905746459961,
+      "learning_rate": 8.938829787234043e-06,
+      "loss": 0.6621,
+      "step": 400
+    },
+    {
+      "epoch": 0.6648936170212766,
+      "grad_norm": 5.367379188537598,
+      "learning_rate": 8.675531914893619e-06,
+      "loss": 0.502,
+      "step": 500
+    },
+    {
+      "epoch": 0.6648936170212766,
+      "eval_accuracy": 0.8707025647163391,
+      "eval_loss": 0.4583953320980072,
+      "eval_runtime": 60.5694,
+      "eval_samples_per_second": 22.09,
+      "eval_steps_per_second": 2.774,
+      "step": 500
+    },
+    {
+      "epoch": 0.7978723404255319,
+      "grad_norm": 11.638015747070312,
+      "learning_rate": 8.412234042553192e-06,
+      "loss": 0.4986,
+      "step": 600
+    },
+    {
+      "epoch": 0.9308510638297872,
+      "grad_norm": 21.74812126159668,
+      "learning_rate": 8.14627659574468e-06,
+      "loss": 0.4095,
+      "step": 700
+    },
+    {
+      "epoch": 1.0638297872340425,
+      "grad_norm": 13.848979949951172,
+      "learning_rate": 7.880319148936171e-06,
+      "loss": 0.3986,
+      "step": 800
+    },
+    {
+      "epoch": 1.196808510638298,
+      "grad_norm": 18.350629806518555,
+      "learning_rate": 7.61436170212766e-06,
+      "loss": 0.3427,
+      "step": 900
+    },
+    {
+      "epoch": 1.3297872340425532,
+      "grad_norm": 8.771578788757324,
+      "learning_rate": 7.348404255319149e-06,
+      "loss": 0.3239,
+      "step": 1000
+    },
+    {
+      "epoch": 1.3297872340425532,
+      "eval_accuracy": 0.9013453125953674,
+      "eval_loss": 0.3637482821941376,
+      "eval_runtime": 60.6579,
+      "eval_samples_per_second": 22.058,
+      "eval_steps_per_second": 2.77,
+      "step": 1000
+    },
+    {
+      "epoch": 1.4627659574468086,
+      "grad_norm": 20.235013961791992,
+      "learning_rate": 7.0824468085106394e-06,
+      "loss": 0.2807,
+      "step": 1100
+    },
+    {
+      "epoch": 1.5957446808510638,
+      "grad_norm": 2.6776235103607178,
+      "learning_rate": 6.816489361702127e-06,
+      "loss": 0.2626,
+      "step": 1200
+    },
+    {
+      "epoch": 1.728723404255319,
+      "grad_norm": 1.6725181341171265,
+      "learning_rate": 6.550531914893618e-06,
+      "loss": 0.2898,
+      "step": 1300
+    },
+    {
+      "epoch": 1.8617021276595744,
+      "grad_norm": 10.663431167602539,
+      "learning_rate": 6.284574468085107e-06,
+      "loss": 0.2627,
+      "step": 1400
+    },
+    {
+      "epoch": 1.9946808510638299,
+      "grad_norm": 34.09892654418945,
+      "learning_rate": 6.018617021276596e-06,
+      "loss": 0.2329,
+      "step": 1500
+    },
+    {
+      "epoch": 1.9946808510638299,
+      "eval_accuracy": 0.9147982001304626,
+      "eval_loss": 0.3463841676712036,
+      "eval_runtime": 60.4518,
+      "eval_samples_per_second": 22.133,
+      "eval_steps_per_second": 2.779,
+      "step": 1500
+    },
+    {
+      "epoch": 2.127659574468085,
+      "grad_norm": 0.13858869671821594,
+      "learning_rate": 5.752659574468086e-06,
+      "loss": 0.1826,
+      "step": 1600
+    },
+    {
+      "epoch": 2.2606382978723403,
+      "grad_norm": 12.44304084777832,
+      "learning_rate": 5.4867021276595745e-06,
+      "loss": 0.2282,
+      "step": 1700
+    },
+    {
+      "epoch": 2.393617021276596,
+      "grad_norm": 47.84838104248047,
+      "learning_rate": 5.220744680851064e-06,
+      "loss": 0.1477,
+      "step": 1800
+    },
+    {
+      "epoch": 2.526595744680851,
+      "grad_norm": 9.419229507446289,
+      "learning_rate": 4.954787234042554e-06,
+      "loss": 0.204,
+      "step": 1900
+    },
+    {
+      "epoch": 2.6595744680851063,
+      "grad_norm": 10.298186302185059,
+      "learning_rate": 4.6888297872340425e-06,
+      "loss": 0.1768,
+      "step": 2000
+    },
+    {
+      "epoch": 2.6595744680851063,
+      "eval_accuracy": 0.9125560522079468,
+      "eval_loss": 0.40557414293289185,
+      "eval_runtime": 60.529,
+      "eval_samples_per_second": 22.105,
+      "eval_steps_per_second": 2.776,
+      "step": 2000
+    },
+    {
+      "epoch": 2.7925531914893615,
+      "grad_norm": 8.407756805419922,
+      "learning_rate": 4.422872340425532e-06,
+      "loss": 0.1877,
+      "step": 2100
+    },
+    {
+      "epoch": 2.925531914893617,
+      "grad_norm": 0.09165091067552567,
+      "learning_rate": 4.156914893617022e-06,
+      "loss": 0.1581,
+      "step": 2200
+    },
+    {
+      "epoch": 3.0585106382978724,
+      "grad_norm": 17.642484664916992,
+      "learning_rate": 3.890957446808511e-06,
+      "loss": 0.1732,
+      "step": 2300
+    },
+    {
+      "epoch": 3.1914893617021276,
+      "grad_norm": 19.95751953125,
+      "learning_rate": 3.625e-06,
+      "loss": 0.1504,
+      "step": 2400
+    },
+    {
+      "epoch": 3.324468085106383,
+      "grad_norm": 0.0460813082754612,
+      "learning_rate": 3.3590425531914896e-06,
+      "loss": 0.1315,
+      "step": 2500
+    },
+    {
+      "epoch": 3.324468085106383,
+      "eval_accuracy": 0.9162929654121399,
+      "eval_loss": 0.38025063276290894,
+      "eval_runtime": 60.4863,
+      "eval_samples_per_second": 22.121,
+      "eval_steps_per_second": 2.777,
+      "step": 2500
+    },
+    {
+      "epoch": 3.4574468085106385,
+      "grad_norm": 0.5341168642044067,
+      "learning_rate": 3.0957446808510637e-06,
+      "loss": 0.1281,
+      "step": 2600
+    },
+    {
+      "epoch": 3.5904255319148937,
+      "grad_norm": 0.07712464034557343,
+      "learning_rate": 2.8297872340425537e-06,
+      "loss": 0.1329,
+      "step": 2700
+    },
+    {
+      "epoch": 3.723404255319149,
+      "grad_norm": 0.05446462705731392,
+      "learning_rate": 2.563829787234043e-06,
+      "loss": 0.137,
+      "step": 2800
+    },
+    {
+      "epoch": 3.8563829787234045,
+      "grad_norm": 84.07832336425781,
+      "learning_rate": 2.297872340425532e-06,
+      "loss": 0.1207,
+      "step": 2900
+    },
+    {
+      "epoch": 3.9893617021276597,
+      "grad_norm": 37.90822219848633,
+      "learning_rate": 2.0319148936170213e-06,
+      "loss": 0.1383,
+      "step": 3000
+    },
+    {
+      "epoch": 3.9893617021276597,
+      "eval_accuracy": 0.9297459125518799,
+      "eval_loss": 0.34964442253112793,
+      "eval_runtime": 60.8105,
+      "eval_samples_per_second": 22.003,
+      "eval_steps_per_second": 2.763,
+      "step": 3000
+    },
+    {
+      "epoch": 3.9893617021276597,
+      "step": 3000,
+      "total_flos": 1.8714792090048768e+18,
+      "train_loss": 0.34261955579121905,
+      "train_runtime": 3211.0561,
+      "train_samples_per_second": 18.713,
+      "train_steps_per_second": 1.171
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 3760,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.8714792090048768e+18,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9756dc229c0bf8c3e59f55a39d3df1ffcbf0389979c8dbcba83d90afeaf63d9c
+size 5176