Spaces:
Sleeping
Sleeping
update layout
Browse files
app.py
CHANGED
@@ -6,13 +6,15 @@ from espnet2.bin.s2t_inference_language import Speech2Language
|
|
6 |
from espnet2.bin.s2t_inference import Speech2Text
|
7 |
|
8 |
|
9 |
-
TITLE="
|
10 |
|
11 |
DESCRIPTION='''
|
12 |
OWSM (pronounced as "awesome") is a series of Open Whisper-style Speech Models from [CMU WAVLab](https://www.wavlab.org/).
|
13 |
We reproduce Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet).
|
14 |
-
For more details, please check our [website](https://www.wavlab.org/activities/2024/owsm/)
|
|
|
15 |
|
|
|
16 |
The latest demo uses OWSM v3.1 based on [E-Branchformer](https://arxiv.org/abs/2210.00077).
|
17 |
OWSM v3.1 has 1.02B parameters and is trained on 180k hours of labelled data. It supports various speech-to-text tasks:
|
18 |
- Speech recognition in 151 languages
|
@@ -24,12 +26,9 @@ OWSM v3.1 has 1.02B parameters and is trained on 180k hours of labelled data. It
|
|
24 |
As a demo, the input speech should not exceed 2 minutes. We also limit the maximum number of tokens to be generated.
|
25 |
Please try our [Colab demo](https://colab.research.google.com/drive/1zKI3ZY_OtZd6YmVeED6Cxy1QwT1mqv9O?usp=sharing) if you want to explore more features.
|
26 |
|
27 |
-
Disclaimer
|
28 |
-
|
29 |
-
Please consider citing the following related papers if you find our work helpful.
|
30 |
|
31 |
-
|
32 |
-
<p>
|
33 |
|
34 |
```
|
35 |
@inproceedings{peng2024owsm31,
|
@@ -45,10 +44,6 @@ Please consider citing the following related papers if you find our work helpful
|
|
45 |
year={2023}
|
46 |
}
|
47 |
```
|
48 |
-
|
49 |
-
</p>
|
50 |
-
</details>
|
51 |
-
|
52 |
'''
|
53 |
|
54 |
if not torch.cuda.is_available():
|
@@ -168,6 +163,7 @@ demo = gr.Interface(
|
|
168 |
],
|
169 |
title=TITLE,
|
170 |
description=DESCRIPTION,
|
|
|
171 |
allow_flagging="never",
|
172 |
)
|
173 |
|
@@ -176,5 +172,5 @@ if __name__ == "__main__":
|
|
176 |
demo.launch(
|
177 |
show_api=False,
|
178 |
share=True,
|
179 |
-
ssr_mode=
|
180 |
)
|
|
|
6 |
from espnet2.bin.s2t_inference import Speech2Text
|
7 |
|
8 |
|
9 |
+
TITLE="Open Whisper-style Speech Model from CMU WAVLab"
|
10 |
|
11 |
DESCRIPTION='''
|
12 |
OWSM (pronounced as "awesome") is a series of Open Whisper-style Speech Models from [CMU WAVLab](https://www.wavlab.org/).
|
13 |
We reproduce Whisper-style training using publicly available data and an open-source toolkit [ESPnet](https://github.com/espnet/espnet).
|
14 |
+
For more details, please check our [website](https://www.wavlab.org/activities/2024/owsm/).
|
15 |
+
'''
|
16 |
|
17 |
+
ARTICLE = '''
|
18 |
The latest demo uses OWSM v3.1 based on [E-Branchformer](https://arxiv.org/abs/2210.00077).
|
19 |
OWSM v3.1 has 1.02B parameters and is trained on 180k hours of labelled data. It supports various speech-to-text tasks:
|
20 |
- Speech recognition in 151 languages
|
|
|
26 |
As a demo, the input speech should not exceed 2 minutes. We also limit the maximum number of tokens to be generated.
|
27 |
Please try our [Colab demo](https://colab.research.google.com/drive/1zKI3ZY_OtZd6YmVeED6Cxy1QwT1mqv9O?usp=sharing) if you want to explore more features.
|
28 |
|
29 |
+
**Disclaimer:** OWSM has not been thoroughly evaluated in all tasks. Due to limited training data, it may not perform well for certain languages.
|
|
|
|
|
30 |
|
31 |
+
Please consider citing the following papers if you find our work helpful.
|
|
|
32 |
|
33 |
```
|
34 |
@inproceedings{peng2024owsm31,
|
|
|
44 |
year={2023}
|
45 |
}
|
46 |
```
|
|
|
|
|
|
|
|
|
47 |
'''
|
48 |
|
49 |
if not torch.cuda.is_available():
|
|
|
163 |
],
|
164 |
title=TITLE,
|
165 |
description=DESCRIPTION,
|
166 |
+
article=ARTICLE,
|
167 |
allow_flagging="never",
|
168 |
)
|
169 |
|
|
|
172 |
demo.launch(
|
173 |
show_api=False,
|
174 |
share=True,
|
175 |
+
ssr_mode=True,
|
176 |
)
|