.gitignore DELETED
@@ -1 +0,0 @@
1
- checkpoint-*/
 
 
README.md DELETED
@@ -1,96 +0,0 @@
1
- ---
2
- tags:
3
- - generated_from_trainer
4
- model-index:
5
- - name: uaspeech-foundation-fintuned
6
- results: []
7
- ---
8
-
9
- <!-- This model card has been generated automatically according to the information the Trainer had access to. You
10
- should probably proofread and complete it, then remove this comment. -->
11
-
12
- # uaspeech-foundation-fintuned
13
-
14
- - Loss: 2.5324
15
- - Wer: 1.2855
16
-
17
- ## Model description
18
-
19
- More information needed
20
-
21
- ## Intended uses & limitations
22
-
23
- More information needed
24
-
25
- ## Training and evaluation data
26
-
27
- More information needed
28
-
29
- ## Training procedure
30
-
31
- ### Training hyperparameters
32
-
33
- The following hyperparameters were used during training:
34
- - learning_rate: 0.0001
35
- - train_batch_size: 4
36
- - eval_batch_size: 8
37
- - seed: 42
38
- - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
- - lr_scheduler_type: linear
40
- - lr_scheduler_warmup_steps: 1000
41
- - num_epochs: 30
42
-
43
- ### Training results
44
-
45
- | Training Loss | Epoch | Step | Validation Loss | Wer |
46
- |:-------------:|:-----:|:-----:|:---------------:|:------:|
47
- | 41.2984 | 0.7 | 500 | 2.8954 | 1.0 |
48
- | 3.0227 | 1.4 | 1000 | 2.8232 | 1.0042 |
49
- | 2.8283 | 2.11 | 1500 | 2.6291 | 1.0309 |
50
- | 2.5552 | 2.81 | 2000 | 2.2593 | 1.9170 |
51
- | 2.1714 | 3.51 | 2500 | 1.9586 | 1.9142 |
52
- | 1.8537 | 4.21 | 3000 | 1.5725 | 1.8579 |
53
- | 1.6087 | 4.92 | 3500 | 1.2772 | 1.7426 |
54
- | 1.3108 | 5.62 | 4000 | 1.2792 | 1.6751 |
55
- | 1.1652 | 6.32 | 4500 | 1.4565 | 1.6174 |
56
- | 1.0113 | 7.02 | 5000 | 1.1906 | 1.5626 |
57
- | 0.925 | 7.72 | 5500 | 1.4491 | 1.5260 |
58
- | 0.8183 | 8.43 | 6000 | 1.3712 | 1.5387 |
59
- | 0.7118 | 9.13 | 6500 | 1.4713 | 1.4866 |
60
- | 0.6959 | 9.83 | 7000 | 1.3336 | 1.4318 |
61
- | 0.6146 | 10.53 | 7500 | 1.3690 | 1.4177 |
62
- | 0.5655 | 11.24 | 8000 | 1.3789 | 1.4135 |
63
- | 0.4969 | 11.94 | 8500 | 1.5476 | 1.3966 |
64
- | 0.4705 | 12.64 | 9000 | 1.9062 | 1.3797 |
65
- | 0.4387 | 13.34 | 9500 | 1.2711 | 1.3924 |
66
- | 0.4115 | 14.04 | 10000 | 1.6318 | 1.3769 |
67
- | 0.3695 | 14.75 | 10500 | 1.5119 | 1.3755 |
68
- | 0.377 | 15.45 | 11000 | 1.6637 | 1.3812 |
69
- | 0.3788 | 16.15 | 11500 | 1.6636 | 1.3699 |
70
- | 0.3396 | 16.85 | 12000 | 1.6572 | 1.3418 |
71
- | 0.3047 | 17.56 | 12500 | 1.4740 | 1.3361 |
72
- | 0.2804 | 18.26 | 13000 | 2.0885 | 1.3249 |
73
- | 0.2995 | 18.96 | 13500 | 1.9536 | 1.3235 |
74
- | 0.2628 | 19.66 | 14000 | 1.7736 | 1.3179 |
75
- | 0.2703 | 20.37 | 14500 | 2.0018 | 1.3291 |
76
- | 0.2335 | 21.07 | 15000 | 1.7962 | 1.3221 |
77
- | 0.2068 | 21.77 | 15500 | 2.3187 | 1.3136 |
78
- | 0.2311 | 22.47 | 16000 | 2.4853 | 1.3291 |
79
- | 0.2491 | 23.17 | 16500 | 2.1901 | 1.3024 |
80
- | 0.1836 | 23.88 | 17000 | 2.4344 | 1.2911 |
81
- | 0.1823 | 24.58 | 17500 | 2.3705 | 1.3066 |
82
- | 0.1575 | 25.28 | 18000 | 2.1864 | 1.2897 |
83
- | 0.1451 | 25.98 | 18500 | 2.4216 | 1.2883 |
84
- | 0.1502 | 26.69 | 19000 | 2.1780 | 1.2855 |
85
- | 0.1392 | 27.39 | 19500 | 2.4009 | 1.2925 |
86
- | 0.1609 | 28.09 | 20000 | 2.4250 | 1.2982 |
87
- | 0.1066 | 28.79 | 20500 | 2.4433 | 1.2897 |
88
- | 0.1514 | 29.49 | 21000 | 2.5063 | 1.2855 |
89
-
90
-
91
- ### Framework versions
92
-
93
- - Transformers 4.23.1
94
- - Pytorch 1.12.1+cu113
95
- - Datasets 1.18.3
96
- - Tokenizers 0.13.2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
config.json DELETED
@@ -1,109 +0,0 @@
1
- {
2
- "_name_or_path": "yongjian/wav2vec2-large-a",
3
- "activation_dropout": 0.1,
4
- "adapter_kernel_size": 3,
5
- "adapter_stride": 2,
6
- "add_adapter": false,
7
- "apply_spec_augment": true,
8
- "architectures": [
9
- "Wav2Vec2ForCTC"
10
- ],
11
- "attention_dropout": 0.1,
12
- "bos_token_id": 1,
13
- "classifier_proj_size": 256,
14
- "codevector_dim": 256,
15
- "contrastive_logits_temperature": 0.1,
16
- "conv_bias": true,
17
- "conv_dim": [
18
- 512,
19
- 512,
20
- 512,
21
- 512,
22
- 512,
23
- 512,
24
- 512
25
- ],
26
- "conv_kernel": [
27
- 10,
28
- 3,
29
- 3,
30
- 3,
31
- 3,
32
- 2,
33
- 2
34
- ],
35
- "conv_stride": [
36
- 5,
37
- 2,
38
- 2,
39
- 2,
40
- 2,
41
- 2,
42
- 2
43
- ],
44
- "ctc_loss_reduction": "mean",
45
- "ctc_zero_infinity": false,
46
- "diversity_loss_weight": 0.1,
47
- "do_stable_layer_norm": true,
48
- "eos_token_id": 2,
49
- "feat_extract_activation": "gelu",
50
- "feat_extract_dropout": 0.0,
51
- "feat_extract_norm": "layer",
52
- "feat_proj_dropout": 0.1,
53
- "feat_quantizer_dropout": 0.0,
54
- "final_dropout": 0.1,
55
- "gradient_checkpointing": false,
56
- "hidden_act": "gelu",
57
- "hidden_dropout": 0.1,
58
- "hidden_dropout_prob": 0.1,
59
- "hidden_size": 1024,
60
- "initializer_range": 0.02,
61
- "intermediate_size": 4096,
62
- "layer_norm_eps": 1e-05,
63
- "layerdrop": 0.1,
64
- "mask_feature_length": 10,
65
- "mask_feature_min_masks": 0,
66
- "mask_feature_prob": 0.0,
67
- "mask_time_length": 10,
68
- "mask_time_min_masks": 2,
69
- "mask_time_prob": 0.05,
70
- "model_type": "wav2vec2",
71
- "num_adapter_layers": 3,
72
- "num_attention_heads": 16,
73
- "num_codevector_groups": 2,
74
- "num_codevectors_per_group": 320,
75
- "num_conv_pos_embedding_groups": 16,
76
- "num_conv_pos_embeddings": 128,
77
- "num_feat_extract_layers": 7,
78
- "num_hidden_layers": 24,
79
- "num_negatives": 100,
80
- "output_hidden_size": 1024,
81
- "pad_token_id": 28,
82
- "proj_codevector_dim": 256,
83
- "tdnn_dilation": [
84
- 1,
85
- 2,
86
- 3,
87
- 1,
88
- 1
89
- ],
90
- "tdnn_dim": [
91
- 512,
92
- 512,
93
- 512,
94
- 512,
95
- 1500
96
- ],
97
- "tdnn_kernel": [
98
- 5,
99
- 3,
100
- 3,
101
- 1,
102
- 1
103
- ],
104
- "torch_dtype": "float32",
105
- "transformers_version": "4.23.1",
106
- "use_weighted_layer_sum": false,
107
- "vocab_size": 32,
108
- "xvector_output_dim": 512
109
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
preprocessor_config.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "do_normalize": true,
3
- "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
- "feature_size": 1,
5
- "padding_side": "right",
6
- "padding_value": 0.0,
7
- "return_attention_mask": false,
8
- "sampling_rate": 16000
9
- }
 
 
 
 
 
 
 
 
 
 
pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a055cddc071655a160dc6b709a1977abf8361ceeec541cf426dd832b63c98cf
3
- size 1262029937
 
 
 
 
runs/Nov21_06-37-38_6696bcd44a2b/events.out.tfevents.1669021909.6696bcd44a2b.106.2 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5315c9ca1d48de5dfc596648177ab5cec6a2f0626479d5d45ee844dd481bdd8
3
- size 364
 
 
 
 
special_tokens_map.json DELETED
@@ -1,6 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
- }
 
 
 
 
 
 
 
tokenizer_config.json DELETED
@@ -1,10 +0,0 @@
1
- {
2
- "bos_token": "<s>",
3
- "do_lower_case": false,
4
- "eos_token": "</s>",
5
- "pad_token": "[PAD]",
6
- "replace_word_delimiter_char": " ",
7
- "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
- "unk_token": "[UNK]",
9
- "word_delimiter_token": "|"
10
- }
 
 
 
 
 
 
 
 
 
 
 
training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:69ef2849dd93fa9fb2aea89abb0e8f7fccb88d3568d959037d7d1022e4b8cd77
3
- size 3375
 
 
 
 
vocab.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "[PAD]": 28,
3
- "[UNK]": 27,
4
- "a": 19,
5
- "b": 24,
6
- "c": 15,
7
- "d": 21,
8
- "e": 8,
9
- "f": 12,
10
- "g": 6,
11
- "h": 5,
12
- "i": 25,
13
- "j": 1,
14
- "k": 9,
15
- "l": 18,
16
- "m": 17,
17
- "n": 2,
18
- "o": 7,
19
- "p": 11,
20
- "q": 0,
21
- "r": 14,
22
- "s": 10,
23
- "t": 4,
24
- "u": 22,
25
- "v": 20,
26
- "w": 16,
27
- "x": 26,
28
- "y": 23,
29
- "z": 3,
30
- "|": 13
31
- }