Spaces:
Sleeping
Sleeping
Jan van Doorn
commited on
Updated model versions
Browse files
app.py
CHANGED
@@ -19,18 +19,16 @@ login(token=os.environ['hf_token'])
|
|
19 |
|
20 |
bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
|
21 |
|
|
|
|
|
22 |
#%%
|
23 |
def transcribe(audio_file, audio_mic, model_version):
|
24 |
-
if model_version == '
|
25 |
-
whisper =
|
26 |
-
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM-ANSP'
|
27 |
-
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2, ATCOSIM and ANSP datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
28 |
-
elif model_version == 'large-v2':
|
29 |
-
whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
|
30 |
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
|
31 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
32 |
elif model_version == 'large-v3':
|
33 |
-
whisper =
|
34 |
ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
|
35 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
36 |
if audio_mic is not None:
|
@@ -76,7 +74,7 @@ iface = gr.Interface(
|
|
76 |
gr.Audio(source='microphone', type='filepath'),
|
77 |
|
78 |
gr.Checkbox(label='Transcribe only', default=False),
|
79 |
-
gr.Dropdown(choices=['
|
80 |
],
|
81 |
outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
|
82 |
title='Whisper',
|
|
|
19 |
|
20 |
bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
|
21 |
|
22 |
+
whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
|
23 |
+
whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
|
24 |
#%%
|
25 |
def transcribe(audio_file, audio_mic, model_version):
|
26 |
+
if model_version == 'large-v2':
|
27 |
+
whisper = whisper_v2
|
|
|
|
|
|
|
|
|
28 |
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
|
29 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
30 |
elif model_version == 'large-v3':
|
31 |
+
whisper = whisper_v3
|
32 |
ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
|
33 |
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
|
34 |
if audio_mic is not None:
|
|
|
74 |
gr.Audio(source='microphone', type='filepath'),
|
75 |
|
76 |
gr.Checkbox(label='Transcribe only', default=False),
|
77 |
+
gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version'),
|
78 |
],
|
79 |
outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
|
80 |
title='Whisper',
|