File size: 1,312 Bytes
580817b f1ccfa6 580817b 3d4d376 919ca0a 08f9c71 88c40a0 3d4d376 1e0aa04 580817b fbab64a 3d4d376 1e0aa04 3d4d376 c73c4f7 919ca0a c73c4f7 8fb298d 919ca0a 8fb298d 580817b c761a0e 580817b c4af624 580817b c4af624 1544677 580817b c4af624 1544677 580817b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import nemo.collections.asr as nemo_asr
import gradio as gr
import pandas as pd
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name="stt_rw_conformer_ctc_large")
df = pd.read_csv("amasaku_data.tsv",sep='\t')
print(df)
amasaku_mapping = {str(key).lower():str(val).lower() for key,val in zip(df.iloc[:,0],df.iloc[:,1])}
def transcribe(file):
#if not audio:
# return {state_var: state, transcription_var: state}
print("filename: ",file)
transcription= asr_model.transcribe([file])
transcription = transcription[0].lower().split()
transcribed_with_amasuku = []
for word in transcription:
transcribed_with_amasuku.append(amasaku_mapping.get(word,word))
transcribed_with_amasuku = " ".join(transcribed_with_amasuku)
return transcribed_with_amasaku.capitalize()
with gr.Blocks() as demo:
# state_var = gr.State("")
with gr.Row():
with gr.Column():
uploaded_audio = gr.Audio(label="Upload Audio File", type="filepath")
with gr.Column():
transcription = gr.Textbox(type="text", label="Transcription")
with gr.Row():
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(
transcribe,
[uploaded_audio],
transcription
)
demo.launch() |