End of training

Browse files

Files changed (10) hide show

README.md +69 -0
all_results.json +9 -0
config.json +93 -0
model.safetensors +3 -0
preprocessor_config.json +9 -0
runs/Jun15_12-00-10_4255ba7a45a2/events.out.tfevents.1718452832.4255ba7a45a2.35.0 +3 -0
runs/Jun15_12-02-40_4255ba7a45a2/events.out.tfevents.1718452968.4255ba7a45a2.35.1 +3 -0
train_results.json +9 -0
trainer_state.json +447 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+license: apache-2.0
+base_model: facebook/hubert-large-ls960-ft
+tags:
+- generated_from_trainer
+metrics:
+- accuracy
+model-index:
+- name: hubert-agum960-amharic
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# hubert-agum960-amharic
+This model is a fine-tuned version of [facebook/hubert-large-ls960-ft](https://huggingface.co/facebook/hubert-large-ls960-ft) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4469
+- Accuracy: 0.9260
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 1e-05
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- num_epochs: 5
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss | Accuracy |
+|:-------------:|:------:|:----:|:---------------:|:--------:|
+| 1.0337        | 0.1664 | 500  | 0.9555          | 0.6861   |
+| 0.5792        | 0.3328 | 1000 | 0.6325          | 0.8535   |
+| 0.3656        | 0.4992 | 1500 | 0.5913          | 0.8789   |
+| 0.33          | 0.6656 | 2000 | 0.4296          | 0.9118   |
+| 0.2212        | 0.8319 | 2500 | 0.4698          | 0.9155   |
+| 0.364         | 0.9983 | 3000 | 0.4107          | 0.9133   |
+| 0.2371        | 1.1647 | 3500 | 0.5081          | 0.9118   |
+| 0.3086        | 1.3311 | 4000 | 0.4347          | 0.9275   |
+| 0.1531        | 1.4975 | 4500 | 0.4469          | 0.9260   |
+### Framework versions
+- Transformers 4.41.2
+- Pytorch 2.1.2
+- Datasets 2.19.2
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.497504159733777,
+    "total_flos": 2.060256870141264e+18,
+    "train_loss": 0.4611078107621935,
+    "train_runtime": 3912.8249,
+    "train_samples": 12018,
+    "train_samples_per_second": 15.357,
+    "train_steps_per_second": 3.84
+}

config.json ADDED Viewed

	@@ -0,0 +1,93 @@

+{
+  "_name_or_path": "facebook/hubert-large-ls960-ft",
+  "activation_dropout": 0.1,
+  "apply_spec_augment": true,
+  "architectures": [
+    "HubertForSpeechClassification"
+  ],
+  "attention_dropout": 0.1,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "sum",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.1,
+  "feat_proj_layer_norm": true,
+  "final_dropout": 0.1,
+  "finetuning_task": "hubert_clf",
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.1,
+  "hidden_dropout_prob": 0.1,
+  "hidden_size": 1024,
+  "id2label": {
+    "0": "01Neutral",
+    "1": "02Fearful",
+    "2": "03Happy",
+    "3": "04Sad",
+    "4": "05Angry"
+  },
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "label2id": {
+    "01Neutral": 0,
+    "02Fearful": 1,
+    "03Happy": 2,
+    "04Sad": 3,
+    "05Angry": 4
+  },
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.1,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.05,
+  "model_type": "hubert",
+  "num_attention_heads": 16,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "pad_token_id": 0,
+  "pooling_mode": "mean",
+  "problem_type": "single_label_classification",
+  "torch_dtype": "float32",
+  "transformers_version": "4.41.2",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 32
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7e4e2f75ffdee20cd0081352b3a7a2b9bfda1c2a1f2eec4dccb4cc5b3327c44a
+size 1266025756

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0,
+  "return_attention_mask": true,
+  "sampling_rate": 16000
+}

runs/Jun15_12-00-10_4255ba7a45a2/events.out.tfevents.1718452832.4255ba7a45a2.35.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:30c75af62866c501d31b85188040b9615f1a2290240953a459bf47dfb0ae6fd8
+size 6062

runs/Jun15_12-02-40_4255ba7a45a2/events.out.tfevents.1718452968.4255ba7a45a2.35.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:442a73a6de596e35ac5b29a3c3925d37f14150a0eb2237f679bebedf534cd97c
+size 18862

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 1.497504159733777,
+    "total_flos": 2.060256870141264e+18,
+    "train_loss": 0.4611078107621935,
+    "train_runtime": 3912.8249,
+    "train_samples": 12018,
+    "train_samples_per_second": 15.357,
+    "train_steps_per_second": 3.84
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,447 @@

+{
+  "best_metric": 0.4106574058532715,
+  "best_model_checkpoint": "/kaggle/working/hubert-agum960-amharic/checkpoint-3000",
+  "epoch": 1.497504159733777,
+  "eval_steps": 500,
+  "global_step": 4500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.033277870216306155,
+      "grad_norm": 3.5415408611297607,
+      "learning_rate": 9.933444259567388e-06,
+      "loss": 1.6,
+      "step": 100
+    },
+    {
+      "epoch": 0.06655574043261231,
+      "grad_norm": 3.9917523860931396,
+      "learning_rate": 9.866888519134776e-06,
+      "loss": 1.5237,
+      "step": 200
+    },
+    {
+      "epoch": 0.09983361064891846,
+      "grad_norm": NaN,
+      "learning_rate": 9.80099833610649e-06,
+      "loss": 1.3107,
+      "step": 300
+    },
+    {
+      "epoch": 0.13311148086522462,
+      "grad_norm": 19.04720687866211,
+      "learning_rate": 9.735108153078204e-06,
+      "loss": 1.1818,
+      "step": 400
+    },
+    {
+      "epoch": 0.16638935108153077,
+      "grad_norm": 4.309573650360107,
+      "learning_rate": 9.668552412645591e-06,
+      "loss": 1.0337,
+      "step": 500
+    },
+    {
+      "epoch": 0.16638935108153077,
+      "eval_accuracy": 0.6860986351966858,
+      "eval_loss": 0.9554809927940369,
+      "eval_runtime": 125.3513,
+      "eval_samples_per_second": 10.674,
+      "eval_steps_per_second": 2.672,
+      "step": 500
+    },
+    {
+      "epoch": 0.19966722129783693,
+      "grad_norm": 5.9266791343688965,
+      "learning_rate": 9.601996672212978e-06,
+      "loss": 0.8586,
+      "step": 600
+    },
+    {
+      "epoch": 0.23294509151414308,
+      "grad_norm": 25.942941665649414,
+      "learning_rate": 9.535440931780367e-06,
+      "loss": 0.7985,
+      "step": 700
+    },
+    {
+      "epoch": 0.26622296173044924,
+      "grad_norm": 23.755361557006836,
+      "learning_rate": 9.469550748752082e-06,
+      "loss": 0.6843,
+      "step": 800
+    },
+    {
+      "epoch": 0.2995008319467554,
+      "grad_norm": 5.648043155670166,
+      "learning_rate": 9.402995008319469e-06,
+      "loss": 0.5446,
+      "step": 900
+    },
+    {
+      "epoch": 0.33277870216306155,
+      "grad_norm": 1.2758762836456299,
+      "learning_rate": 9.337104825291182e-06,
+      "loss": 0.5792,
+      "step": 1000
+    },
+    {
+      "epoch": 0.33277870216306155,
+      "eval_accuracy": 0.853512704372406,
+      "eval_loss": 0.6325268745422363,
+      "eval_runtime": 121.8954,
+      "eval_samples_per_second": 10.977,
+      "eval_steps_per_second": 2.748,
+      "step": 1000
+    },
+    {
+      "epoch": 0.36605657237936773,
+      "grad_norm": 1.7034721374511719,
+      "learning_rate": 9.27054908485857e-06,
+      "loss": 0.4254,
+      "step": 1100
+    },
+    {
+      "epoch": 0.39933444259567386,
+      "grad_norm": 0.4905766546726227,
+      "learning_rate": 9.203993344425958e-06,
+      "loss": 0.5024,
+      "step": 1200
+    },
+    {
+      "epoch": 0.43261231281198004,
+      "grad_norm": 1.4882396459579468,
+      "learning_rate": 9.137437603993346e-06,
+      "loss": 0.4714,
+      "step": 1300
+    },
+    {
+      "epoch": 0.46589018302828616,
+      "grad_norm": 0.21027016639709473,
+      "learning_rate": 9.070881863560733e-06,
+      "loss": 0.4184,
+      "step": 1400
+    },
+    {
+      "epoch": 0.49916805324459235,
+      "grad_norm": 0.40890273451805115,
+      "learning_rate": 9.004326123128122e-06,
+      "loss": 0.3656,
+      "step": 1500
+    },
+    {
+      "epoch": 0.49916805324459235,
+      "eval_accuracy": 0.878923773765564,
+      "eval_loss": 0.5912803411483765,
+      "eval_runtime": 121.7902,
+      "eval_samples_per_second": 10.986,
+      "eval_steps_per_second": 2.751,
+      "step": 1500
+    },
+    {
+      "epoch": 0.5324459234608985,
+      "grad_norm": 27.935806274414062,
+      "learning_rate": 8.937770382695509e-06,
+      "loss": 0.4205,
+      "step": 1600
+    },
+    {
+      "epoch": 0.5657237936772047,
+      "grad_norm": 13.320003509521484,
+      "learning_rate": 8.871880199667222e-06,
+      "loss": 0.3586,
+      "step": 1700
+    },
+    {
+      "epoch": 0.5990016638935108,
+      "grad_norm": 1.6185659170150757,
+      "learning_rate": 8.80532445923461e-06,
+      "loss": 0.3782,
+      "step": 1800
+    },
+    {
+      "epoch": 0.632279534109817,
+      "grad_norm": 1.0840280055999756,
+      "learning_rate": 8.738768718801997e-06,
+      "loss": 0.3585,
+      "step": 1900
+    },
+    {
+      "epoch": 0.6655574043261231,
+      "grad_norm": 0.10571889579296112,
+      "learning_rate": 8.672212978369386e-06,
+      "loss": 0.33,
+      "step": 2000
+    },
+    {
+      "epoch": 0.6655574043261231,
+      "eval_accuracy": 0.9118086695671082,
+      "eval_loss": 0.4296289384365082,
+      "eval_runtime": 121.9384,
+      "eval_samples_per_second": 10.973,
+      "eval_steps_per_second": 2.747,
+      "step": 2000
+    },
+    {
+      "epoch": 0.6988352745424293,
+      "grad_norm": 12.672430038452148,
+      "learning_rate": 8.605657237936773e-06,
+      "loss": 0.4664,
+      "step": 2100
+    },
+    {
+      "epoch": 0.7321131447587355,
+      "grad_norm": 0.23700322210788727,
+      "learning_rate": 8.539101497504162e-06,
+      "loss": 0.364,
+      "step": 2200
+    },
+    {
+      "epoch": 0.7653910149750416,
+      "grad_norm": 0.07070857286453247,
+      "learning_rate": 8.472545757071549e-06,
+      "loss": 0.3883,
+      "step": 2300
+    },
+    {
+      "epoch": 0.7986688851913477,
+      "grad_norm": 0.022693803533911705,
+      "learning_rate": 8.405990016638937e-06,
+      "loss": 0.365,
+      "step": 2400
+    },
+    {
+      "epoch": 0.831946755407654,
+      "grad_norm": 46.31725311279297,
+      "learning_rate": 8.339434276206322e-06,
+      "loss": 0.2212,
+      "step": 2500
+    },
+    {
+      "epoch": 0.831946755407654,
+      "eval_accuracy": 0.9155455827713013,
+      "eval_loss": 0.4698057472705841,
+      "eval_runtime": 122.1469,
+      "eval_samples_per_second": 10.954,
+      "eval_steps_per_second": 2.743,
+      "step": 2500
+    },
+    {
+      "epoch": 0.8652246256239601,
+      "grad_norm": 0.03843013569712639,
+      "learning_rate": 8.272878535773711e-06,
+      "loss": 0.2667,
+      "step": 2600
+    },
+    {
+      "epoch": 0.8985024958402662,
+      "grad_norm": 0.8355897068977356,
+      "learning_rate": 8.206322795341098e-06,
+      "loss": 0.2647,
+      "step": 2700
+    },
+    {
+      "epoch": 0.9317803660565723,
+      "grad_norm": 71.40643310546875,
+      "learning_rate": 8.139767054908487e-06,
+      "loss": 0.2689,
+      "step": 2800
+    },
+    {
+      "epoch": 0.9650582362728786,
+      "grad_norm": 0.15802563726902008,
+      "learning_rate": 8.073211314475874e-06,
+      "loss": 0.2999,
+      "step": 2900
+    },
+    {
+      "epoch": 0.9983361064891847,
+      "grad_norm": 0.03220776841044426,
+      "learning_rate": 8.006655574043262e-06,
+      "loss": 0.364,
+      "step": 3000
+    },
+    {
+      "epoch": 0.9983361064891847,
+      "eval_accuracy": 0.9133034348487854,
+      "eval_loss": 0.4106574058532715,
+      "eval_runtime": 121.8932,
+      "eval_samples_per_second": 10.977,
+      "eval_steps_per_second": 2.748,
+      "step": 3000
+    },
+    {
+      "epoch": 1.0316139767054908,
+      "grad_norm": 14.846766471862793,
+      "learning_rate": 7.940099833610649e-06,
+      "loss": 0.2743,
+      "step": 3100
+    },
+    {
+      "epoch": 1.064891846921797,
+      "grad_norm": 5.4475579261779785,
+      "learning_rate": 7.873544093178038e-06,
+      "loss": 0.2405,
+      "step": 3200
+    },
+    {
+      "epoch": 1.098169717138103,
+      "grad_norm": 0.051481593400239944,
+      "learning_rate": 7.806988352745425e-06,
+      "loss": 0.2283,
+      "step": 3300
+    },
+    {
+      "epoch": 1.1314475873544092,
+      "grad_norm": 0.10480177402496338,
+      "learning_rate": 7.740432612312813e-06,
+      "loss": 0.2049,
+      "step": 3400
+    },
+    {
+      "epoch": 1.1647254575707155,
+      "grad_norm": 0.2395573854446411,
+      "learning_rate": 7.6738768718802e-06,
+      "loss": 0.2371,
+      "step": 3500
+    },
+    {
+      "epoch": 1.1647254575707155,
+      "eval_accuracy": 0.9118086695671082,
+      "eval_loss": 0.5081421732902527,
+      "eval_runtime": 122.1747,
+      "eval_samples_per_second": 10.952,
+      "eval_steps_per_second": 2.742,
+      "step": 3500
+    },
+    {
+      "epoch": 1.1980033277870217,
+      "grad_norm": 0.08652403950691223,
+      "learning_rate": 7.607321131447588e-06,
+      "loss": 0.1947,
+      "step": 3600
+    },
+    {
+      "epoch": 1.2312811980033278,
+      "grad_norm": 0.01758364401757717,
+      "learning_rate": 7.540765391014976e-06,
+      "loss": 0.2981,
+      "step": 3700
+    },
+    {
+      "epoch": 1.264559068219634,
+      "grad_norm": 0.023226574063301086,
+      "learning_rate": 7.4742096505823635e-06,
+      "loss": 0.2315,
+      "step": 3800
+    },
+    {
+      "epoch": 1.29783693843594,
+      "grad_norm": 2.320756673812866,
+      "learning_rate": 7.407653910149751e-06,
+      "loss": 0.1545,
+      "step": 3900
+    },
+    {
+      "epoch": 1.3311148086522462,
+      "grad_norm": 0.02829299308359623,
+      "learning_rate": 7.341098169717139e-06,
+      "loss": 0.3086,
+      "step": 4000
+    },
+    {
+      "epoch": 1.3311148086522462,
+      "eval_accuracy": 0.927503764629364,
+      "eval_loss": 0.43469786643981934,
+      "eval_runtime": 122.0442,
+      "eval_samples_per_second": 10.963,
+      "eval_steps_per_second": 2.745,
+      "step": 4000
+    },
+    {
+      "epoch": 1.3643926788685525,
+      "grad_norm": 0.563818097114563,
+      "learning_rate": 7.274542429284527e-06,
+      "loss": 0.203,
+      "step": 4100
+    },
+    {
+      "epoch": 1.3976705490848587,
+      "grad_norm": 0.032793425023555756,
+      "learning_rate": 7.207986688851915e-06,
+      "loss": 0.2317,
+      "step": 4200
+    },
+    {
+      "epoch": 1.4309484193011648,
+      "grad_norm": 13.284598350524902,
+      "learning_rate": 7.1414309484193024e-06,
+      "loss": 0.1721,
+      "step": 4300
+    },
+    {
+      "epoch": 1.464226289517471,
+      "grad_norm": 7.5517897605896,
+      "learning_rate": 7.0748752079866885e-06,
+      "loss": 0.2042,
+      "step": 4400
+    },
+    {
+      "epoch": 1.497504159733777,
+      "grad_norm": 0.04595191031694412,
+      "learning_rate": 7.008319467554076e-06,
+      "loss": 0.1531,
+      "step": 4500
+    },
+    {
+      "epoch": 1.497504159733777,
+      "eval_accuracy": 0.926008939743042,
+      "eval_loss": 0.4468592703342438,
+      "eval_runtime": 122.6586,
+      "eval_samples_per_second": 10.908,
+      "eval_steps_per_second": 2.731,
+      "step": 4500
+    },
+    {
+      "epoch": 1.497504159733777,
+      "step": 4500,
+      "total_flos": 2.060256870141264e+18,
+      "train_loss": 0.4611078107621935,
+      "train_runtime": 3912.8249,
+      "train_samples_per_second": 15.357,
+      "train_steps_per_second": 3.84
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 15025,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "EarlyStoppingCallback": {
+      "args": {
+        "early_stopping_patience": 3,
+        "early_stopping_threshold": 0.0
+      },
+      "attributes": {
+        "early_stopping_patience_counter": 0
+      }
+    },
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2.060256870141264e+18,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c2e39203d964af082866179c6a0ff14c4c5db0bbec38e0af23ba7e07899d988c
+size 5176