pyf98 commited on
Commit
d49740e
1 Parent(s): 5566f4a

upgrade to v3.1

Browse files
Files changed (38) hide show
  1. README.md +2 -1
  2. app.py +9 -5
  3. owsm_v3.1_ebf/README.md +80 -0
  4. owsm_v3.1_ebf/data/token_list/bpe_unigram50000/bpe.model +3 -0
  5. owsm_v3.1_ebf/data/token_list/bpe_unigram50000/tokens.txt +0 -0
  6. owsm_v3.1_ebf/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz +3 -0
  7. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/config.yaml +0 -0
  8. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/acc.png +0 -0
  9. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/backward_time.png +0 -0
  10. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/cer.png +0 -0
  11. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/cer_ctc.png +0 -0
  12. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/clip.png +0 -0
  13. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/forward_time.png +0 -0
  14. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/gpu_max_cached_mem_GB.png +0 -0
  15. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/grad_norm.png +0 -0
  16. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/iter_time.png +0 -0
  17. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss.png +0 -0
  18. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_att.png +0 -0
  19. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_ctc.png +0 -0
  20. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_scale.png +0 -0
  21. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/optim0_lr0.png +0 -0
  22. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/optim_step_time.png +0 -0
  23. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/train_time.png +0 -0
  24. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/wer.png +0 -0
  25. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.1.log +0 -0
  26. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.10.log +0 -0
  27. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.13.log +0 -0
  28. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.2.log +0 -0
  29. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.3.log +0 -0
  30. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.4.log +0 -0
  31. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.5.log +0 -0
  32. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.6.log +0 -0
  33. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.7.log +0 -0
  34. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.8.log +0 -0
  35. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.9.log +0 -0
  36. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.log +0 -0
  37. owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.till45epoch.pth +3 -0
  38. owsm_v3.1_ebf/meta.yaml +8 -0
README.md CHANGED
@@ -1,5 +1,5 @@
1
  ---
2
- title: OWSM v3 Demo
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
@@ -13,6 +13,7 @@ models:
13
  - espnet/owsm_v2
14
  - espnet/owsm_v2_ebranchformer
15
  - espnet/owsm_v3
 
16
  ---
17
 
18
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: OWSM Demo
3
  emoji: 👀
4
  colorFrom: green
5
  colorTo: blue
 
13
  - espnet/owsm_v2
14
  - espnet/owsm_v2_ebranchformer
15
  - espnet/owsm_v3
16
+ - espnet/owsm_v3.1_ebf
17
  ---
18
 
19
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py CHANGED
@@ -6,20 +6,23 @@ from espnet2.bin.s2t_inference import Speech2Text
6
  from espnet2.bin.s2t_inference_language import Speech2Text as Speech2Lang
7
 
8
 
9
- TITLE="OWSM v3: An Open Whisper-style Speech Model from CMU WAVLab"
 
10
  DESCRIPTION='''
11
  OWSM is an Open Whisper-style Speech Model from [CMU WAVLab](https://www.wavlab.org/).
12
  It reproduces Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet).
 
 
 
 
13
 
14
- OWSM v3 has 889M parameters and is trained on 180k hours of paired speech data. It supports various speech-to-text tasks:
15
  - Speech recognition for 151 languages
16
  - Any-to-any language speech translation
17
  - Timestamp prediction
18
  - Long-form transcription
19
  - Language identification
20
 
21
- For more details, please check out our [paper](https://arxiv.org/abs/2309.13876) (Peng et al., ASRU 2023).
22
-
23
  ```
24
  @article{peng2023owsm,
25
  title={Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data},
@@ -38,7 +41,8 @@ Disclaimer: OWSM has not been thoroughly evaluated in all tasks. Due to limited
38
  if not torch.cuda.is_available():
39
  raise RuntimeError("Please use GPU for better speed")
40
 
41
- model_path = "owsm_v3/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth"
 
42
  device = "cuda" # if torch.cuda.is_available() else "cpu"
43
 
44
  speech2text = Speech2Text.from_pretrained(
 
6
  from espnet2.bin.s2t_inference_language import Speech2Text as Speech2Lang
7
 
8
 
9
+ TITLE="OWSM: An Open Whisper-style Speech Model from CMU WAVLab"
10
+
11
  DESCRIPTION='''
12
  OWSM is an Open Whisper-style Speech Model from [CMU WAVLab](https://www.wavlab.org/).
13
  It reproduces Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet).
14
+ For more details, please check out our [paper](https://arxiv.org/abs/2309.13876) (Peng et al., ASRU 2023).
15
+
16
+ OWSM v3.1 is an improved version of OWSM v3. It significantly outperforms OWSM v3 in almost all evaluation benchmarks.
17
+ We do not include any new training data. Instead, we utilize a state-of-the-art speech encoder, [E-Branchformer](https://arxiv.org/abs/2210.00077).
18
 
19
+ OWSM v3.1 has 1.02B parameters and is trained on 180k hours of paired speech data. It supports various speech-to-text tasks:
20
  - Speech recognition for 151 languages
21
  - Any-to-any language speech translation
22
  - Timestamp prediction
23
  - Long-form transcription
24
  - Language identification
25
 
 
 
26
  ```
27
  @article{peng2023owsm,
28
  title={Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data},
 
41
  if not torch.cuda.is_available():
42
  raise RuntimeError("Please use GPU for better speed")
43
 
44
+ # model_path = "owsm_v3/exp/s2t_train_s2t_transformer_conv2d_size1024_e24_d24_lr2.5e-4_warmup10k_finetune_raw_bpe50000/valid.acc.ave_5best.till50epoch.pth"
45
+ model_path = "owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.till45epoch.pth"
46
  device = "cuda" # if torch.cuda.is_available() else "cpu"
47
 
48
  speech2text = Speech2Text.from_pretrained(
owsm_v3.1_ebf/README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - automatic-speech-recognition
6
+ - speech-translation
7
+ language: multilingual
8
+ datasets:
9
+ - owsm_v3.1
10
+ license: cc-by-4.0
11
+ ---
12
+
13
+ ## OWSM: Open Whisper-style Speech Model
14
+
15
+ [OWSM](https://arxiv.org/abs/2309.13876) is an Open Whisper-style Speech Model from [CMU WAVLab](https://www.wavlab.org/). It reproduces Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet).
16
+
17
+ Our demo is available [here](https://huggingface.co/spaces/pyf98/OWSM_v3_demo).
18
+
19
+ **OWSM v3.1 is an improved version of OWSM v3. It significantly outperforms OWSM v3 in almost all evaluation benchmarks.**
20
+ We do not include any new training data. Instead, we utilize a state-of-the-art speech encoder, [E-Branchformer](https://arxiv.org/abs/2210.00077).
21
+
22
+ OWSM v3.1 has 1.02B parameters in total and is trained on 180k hours of public speech data.
23
+ Specifically, it supports the following speech-to-text tasks:
24
+ - Speech recognition
25
+ - Any-to-any-language speech translation
26
+ - Utterance-level alignment
27
+ - Long-form transcription
28
+ - Language identification
29
+
30
+
31
+ ### Citing OWSM, Branchformers and ESPnet
32
+
33
+ ```BibTex
34
+ @article{peng2023owsm,
35
+ title={Reproducing Whisper-Style Training Using an Open-Source Toolkit and Publicly Available Data},
36
+ author={Yifan Peng and Jinchuan Tian and Brian Yan and Dan Berrebbi and Xuankai Chang and Xinjian Li and Jiatong Shi and Siddhant Arora and William Chen and Roshan Sharma and Wangyou Zhang and Yui Sudo and Muhammad Shakeel and Jee-weon Jung and Soumi Maiti and Shinji Watanabe},
37
+ journal={arXiv preprint arXiv:2309.13876},
38
+ year={2023}
39
+ }
40
+ @inproceedings{peng23b_interspeech,
41
+ author={Yifan Peng and Kwangyoun Kim and Felix Wu and Brian Yan and Siddhant Arora and William Chen and Jiyang Tang and Suwon Shon and Prashant Sridhar and Shinji Watanabe},
42
+ title={{A Comparative Study on E-Branchformer vs Conformer in Speech Recognition, Translation, and Understanding Tasks}},
43
+ year=2023,
44
+ booktitle={Proc. INTERSPEECH 2023},
45
+ pages={2208--2212},
46
+ doi={10.21437/Interspeech.2023-1194}
47
+ }
48
+ @inproceedings{kim2023branchformer,
49
+ title={E-branchformer: Branchformer with enhanced merging for speech recognition},
50
+ author={Kim, Kwangyoun and Wu, Felix and Peng, Yifan and Pan, Jing and Sridhar, Prashant and Han, Kyu J and Watanabe, Shinji},
51
+ booktitle={2022 IEEE Spoken Language Technology Workshop (SLT)},
52
+ pages={84--91},
53
+ year={2023},
54
+ organization={IEEE}
55
+ }
56
+ @InProceedings{pmlr-v162-peng22a,
57
+ title = {Branchformer: Parallel {MLP}-Attention Architectures to Capture Local and Global Context for Speech Recognition and Understanding},
58
+ author = {Peng, Yifan and Dalmia, Siddharth and Lane, Ian and Watanabe, Shinji},
59
+ booktitle = {Proceedings of the 39th International Conference on Machine Learning},
60
+ pages = {17627--17643},
61
+ year = {2022},
62
+ editor = {Chaudhuri, Kamalika and Jegelka, Stefanie and Song, Le and Szepesvari, Csaba and Niu, Gang and Sabato, Sivan},
63
+ volume = {162},
64
+ series = {Proceedings of Machine Learning Research},
65
+ month = {17--23 Jul},
66
+ publisher = {PMLR},
67
+ pdf = {https://proceedings.mlr.press/v162/peng22a/peng22a.pdf},
68
+ url = {https://proceedings.mlr.press/v162/peng22a.html},
69
+ abstract = {Conformer has proven to be effective in many speech processing tasks. It combines the benefits of extracting local dependencies using convolutions and global dependencies using self-attention. Inspired by this, we propose a more flexible, interpretable and customizable encoder alternative, Branchformer, with parallel branches for modeling various ranged dependencies in end-to-end speech processing. In each encoder layer, one branch employs self-attention or its variant to capture long-range dependencies, while the other branch utilizes an MLP module with convolutional gating (cgMLP) to extract local relationships. We conduct experiments on several speech recognition and spoken language understanding benchmarks. Results show that our model outperforms both Transformer and cgMLP. It also matches with or outperforms state-of-the-art results achieved by Conformer. Furthermore, we show various strategies to reduce computation thanks to the two-branch architecture, including the ability to have variable inference complexity in a single trained model. The weights learned for merging branches indicate how local and global dependencies are utilized in different layers, which benefits model designing.}
70
+ }
71
+ @inproceedings{watanabe2018espnet,
72
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
73
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
74
+ year={2018},
75
+ booktitle={Proceedings of Interspeech},
76
+ pages={2207--2211},
77
+ doi={10.21437/Interspeech.2018-1456},
78
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
79
+ }
80
+ ```
owsm_v3.1_ebf/data/token_list/bpe_unigram50000/bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d6327da127e870bcb8c737dceb3bd47ccbce63da74ddb094f64afe313d68c8c
3
+ size 1041297
owsm_v3.1_ebf/data/token_list/bpe_unigram50000/tokens.txt ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_stats_raw_bpe50000/train/feats_stats.npz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ef4b5e465110edf32eec024cf2427eedd677f5733bb87d6b2131e6984a6e13f
3
+ size 1402
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/config.yaml ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/acc.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/backward_time.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/cer.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/cer_ctc.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/clip.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/forward_time.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/gpu_max_cached_mem_GB.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/grad_norm.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/iter_time.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_att.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_ctc.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/loss_scale.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/optim0_lr0.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/optim_step_time.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/train_time.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/images/wer.png ADDED
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.1.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.10.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.13.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.2.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.3.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.4.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.5.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.6.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.7.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.8.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.9.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/train.log ADDED
The diff for this file is too large to render. See raw diff
 
owsm_v3.1_ebf/exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.till45epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfb6d34e9f03af6113ada55463d3abe26f133ff2c64e56c65419f9a469313ad3
3
+ size 4068122375
owsm_v3.1_ebf/meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202308'
2
+ files:
3
+ s2t_model_file: exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/valid.total_count.ave_5best.till45epoch.pth
4
+ python: 3.10.10 (main, Mar 21 2023, 18:45:11) [GCC 11.2.0]
5
+ timestamp: 1703273348.000399
6
+ torch: 1.13.1
7
+ yaml_files:
8
+ s2t_train_config: exp/s2t_train_s2t_ebf_conv2d_size1024_e18_d18_piecewise_lr2e-4_warmup60k_flashattn_raw_bpe50000/config.yaml