File size: 2,472 Bytes
580817b f1ccfa6 681b48e 5f55f11 580817b 3d4d376 919ca0a 54e1781 88c40a0 3d4d376 49ec72e 580817b fbab64a 3d4d376 681b48e 49ec72e 1857e8c 49ec72e 1857e8c 681b48e 00eb299 52c6777 00eb299 681b48e bd275e2 c73c4f7 130a481 c73c4f7 1686f52 e505cae 8fb298d 580817b c761a0e 580817b c4af624 02c7af3 49ec72e 73486fc e06917c c4af624 1544677 580817b 1857e8c 580817b 1857e8c 02c7af3 1544677 580817b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import nemo.collections.asr as nemo_asr
import gradio as gr
import pandas as pd
from pydub import AudioSegment
import os
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name="stt_rw_conformer_ctc_large")
df = pd.read_csv("amasaku_data.tsv",sep='\t')
amasaku_mapping = {str(key).lower():str(val).lower() for key,val in zip(df.iloc[:,0],df.iloc[:,1])}
def transcribe(file):
#if not audio:
# return {state_var: state, transcription_var: state}
#print("filename: ",file)
# print("microphone: ",microphone)
# print("uploaded file: ",file)
# if microphone:
# file_name = file
# elif file:
# file_name = file
if file:
file_name = file
if not file_name:
return "No audio file provided."
try:
audio = AudioSegment.from_file(file_name)
if audio.frame_rate != 16000:
audio = audio.set_frame_rate(16000)
if audio.channels != 1:
audio = audio.set_channels(1)
audio.export(file_name.split(".")[0]+".wav",format="wav")
except Exception as e:
print(e)
transcription= asr_model.transcribe([file])
transcription = transcription[0].lower().split()
transcribed_with_amasaku = []
for word in transcription:
transcribed_with_amasaku.append(amasaku_mapping.get(word,word))
transcribed_with_amasaku = " ".join(transcribed_with_amasaku)
return transcribed_with_amasaku.capitalize()
with gr.Blocks() as demo:
# state_var = gr.State("")
with gr.Row():
with gr.Column():
microphone = gr.Audio(sources="microphone", type="filepath", label="Microphone")
# print("microphone source: ",microphone)
# print(os.listdir("/tmp"))
uploaded_audio = gr.Audio(label="Upload Audio File", type="filepath",sources="upload")
print('upload path: ',uploaded_audio)
with gr.Column():
transcription = gr.Textbox(type="text", label="Transcription")
with gr.Row():
transcribe_button = gr.Button("Transcribe")
def handle_audio(microphone, uploaded_audio):
if microphone:
return transcribe(microphone)
elif uploaded_audio:
return transcribe(uploaded_audio)
else:
return "No audio file provided."
transcribe_button.click(
handle_audio,
[microphone, uploaded_audio],
transcription
)
demo.launch() |