File size: 2,025 Bytes
580817b f1ccfa6 681b48e 580817b 3d4d376 919ca0a 54e1781 88c40a0 3d4d376 a719fc8 580817b fbab64a 3d4d376 681b48e 1440c86 a719fc8 681b48e 52c6777 8feb0c9 681b48e bd275e2 c73c4f7 130a481 c73c4f7 1686f52 e505cae 8fb298d 580817b c761a0e 580817b c4af624 73486fc e06917c 73486fc e06917c c4af624 1544677 580817b c4af624 a719fc8 1544677 580817b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import nemo.collections.asr as nemo_asr
import gradio as gr
import pandas as pd
from pydub import AudioSegment
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name="stt_rw_conformer_ctc_large")
df = pd.read_csv("amasaku_data.tsv",sep='\t')
amasaku_mapping = {str(key).lower():str(val).lower() for key,val in zip(df.iloc[:,0],df.iloc[:,1])}
def transcribe(microphone,file):
#if not audio:
# return {state_var: state, transcription_var: state}
#print("filename: ",file)
print("microphone: ",microphone)
print("uploaded file: ",file)
if microphone:
file = microphone
try:
audio = AudioSegment.from_file(file)
if audio.frame_rate != 16000:
audio = audio.set_frame_rate(16000)
if audio.channels != 1:
audio = audio.set_channels(1)
file = file.split(".")[0]+".wav"
audio.export(file,format="wav")
except Exception as e:
print(e)
transcription= asr_model.transcribe([file])
transcription = transcription[0].lower().split()
transcribed_with_amasaku = []
for word in transcription:
transcribed_with_amasaku.append(amasaku_mapping.get(word,word))
transcribed_with_amasaku = " ".join(transcribed_with_amasaku)
return transcribed_with_amasaku.capitalize()
with gr.Blocks() as demo:
# state_var = gr.State("")
with gr.Row():
with gr.Column():
microphone = gr.Audio(sources="microphone", type="filepath", label="Microphone")
print("microphone source: ",microphone)
uploaded_audio = gr.Audio(label="Upload Audio File", type="filepath",sources="upload")
print('upload path: ',uploaded_audio)
with gr.Column():
transcription = gr.Textbox(type="text", label="Transcription")
with gr.Row():
transcribe_button = gr.Button("Transcribe")
transcribe_button.click(
transcribe,
[microphone, uploaded_audio],
transcription
)
demo.launch() |