Spaces:
Running
Running
File size: 3,072 Bytes
ab2b897 49c643d bffc737 49c643d fd41627 db11291 125f0d6 2ce74f8 125f0d6 d100b4b 125f0d6 2ce74f8 f091ddf bffc737 ab2b897 f091ddf bffc737 ab2b897 f091ddf bffc737 f091ddf bffc737 ab2b897 659ddf4 f091ddf bffc737 ab2b897 bffc737 f091ddf 659ddf4 f091ddf 2ce74f8 bffc737 ab2b897 e81b0e9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
#%%
from huggingface_hub import login
from transformers import pipeline
import gradio as gr
import os
login(token=os.environ['hf_token'])
# print(os.environ['hf_token'])
## Try to load a local model if available
# try:
# whisper = pipeline(model='/mnt/projects/whisper/WhisperANSP/Models/whisper-large-v2-atco2-asr-atcosim-ANSP-3h1m', task='automatic-speech-recognition')
# ttl = 'Whisper Large v2 - ATCO2-ATCOSIM-ANSP'
# dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2, ATCOSIM and ANSP datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
# except:
whisper = pipeline(model='jlvdoorn/whisper-large-v2-atco2-asr-atcosim')
ttl = 'Whisper Large v2 - ATCO2-ATCOSIM'
dis = 'This demo will transcribe ATC audio files by using the Whisper Large v2 model fine-tuned on the ATCO2 and ATCOSIM datasets. \n \n Further it uses a Named Entity Recognition model to extract callsigns, commands and values from the transcription. \n This model is based on Google\'s BERT model and fine-tuned on the ATCO2 dataset.'
bert_atco_ner = pipeline(model='Jzuluaga/bert-base-ner-atc-en-atco2-1h')
#%%
def transcribe(audio_file, audio_mic):
if audio_mic is not None:
return whisper(audio_mic)['text']
elif audio_file is not None:
return whisper(audio_file)['text']
else:
return 'There was no audio to transcribe...'
#%%
def extractCallSignCommand(transcription):
if type(transcription) is str:
result = bert_atco_ner(transcription)
callsigns = []
commands = []
values = []
for item in result:
if 'callsign' in item['entity']:
callsigns.append(item['word'])
if 'command' in item['entity']:
commands.append(item['word'])
if 'value' in item['entity']:
values.append(item['word'])
return 'Callsigns: ' + ', '.join(callsigns) + '\nCommands: ' + ', '.join(commands) + '\nValues: ' + ', '.join(values)
else:
return 'There was no transcription to extract a callsign or command from...'
#%%
def transcribeAndExtract(audio_file, audio_mic, transcribe_only):
transcription = transcribe(audio_file, audio_mic)
if not transcribe_only:
callSignCommandValues = extractCallSignCommand(transcription)
else:
callSignCommandValues = ''
return transcription, callSignCommandValues
#%%
iface = gr.Interface(
fn=transcribeAndExtract,
inputs=[gr.Audio(source='upload', type='filepath', interactive=True), gr.Audio(source='microphone', type='filepath'), gr.Checkbox(label='Transcribe only', default=False)],
outputs=[gr.Text(label='Transcription'), gr.Text(label='Callsigns, commands and values')],
title=ttl,
description=dis,
)
#%%
#iface.launch(server_name='0.0.0.0', server_port=9000)
iface.launch() |