Spaces:

jlvdoorn
/

WhisperATC

Running

App Files Files Community

Jan van Doorn commited on Nov 24, 2023

Commit

5655bbd

unverified ·

1 Parent(s): 981560f

Updated model versions

Browse files

Files changed (1) hide show

app.py +6 -8

app.py CHANGED Viewed

@@ -19,18 +19,16 @@ login(token=os.environ['hf_token'])
 bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
 #%%
 def transcribe(audio_file, audio_mic, model_version):
-    if model_version == 'local':
-        whisper = pipeline(model='/mnt/projects/whisper/WhisperANSP/Models/whisper-large-v2-atco2-asr-atcosim-ANSP-3h1m', task='automatic-speech-recognition')
-        ttl = 'Whisper Large v2 - ATCO2-ATCOSIM-ANSP'
-        dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2, ATCOSIM and ANSP datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
-    elif model_version == 'large-v2':
-        whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
         ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
         dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
     elif model_version == 'large-v3':
-        whisper = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
         ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
         dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
     if audio_mic is not None:
@@ -76,7 +74,7 @@ iface = gr.Interface(
             gr.Audio(source='microphone', type='filepath'),
             gr.Checkbox(label='Transcribe only', default=False),
-            gr.Dropdown(choices=['local', 'large-v2', 'large-v3'], value='large-v3', label='Whisper model version'),
             ],
         outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
         title='Whisper',

 bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
+whisper_v2 = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
+whisper_v3 = pipeline(model='jlvdoorn/whisper-large-v3-atco2-asr-atcosim')
 #%%
 def transcribe(audio_file, audio_mic, model_version):
+    if model_version == 'large-v2':
+        whisper = whisper_v2
         ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
         dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
     elif model_version == 'large-v3':
+        whisper = whisper_v3
         ttl = 'Whisper Large v3 - ATCO2-ATCOSIM'
         dis = 'This demo will transcribe ATC audio files by using the Whisper Large v3 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
     if audio_mic is not None:
             gr.Audio(source='microphone', type='filepath'),
             gr.Checkbox(label='Transcribe only', default=False),
+            gr.Dropdown(choices=['large-v2', 'large-v3'], value='large-v3', label='Whisper model version'),
             ],
         outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
         title='Whisper',