Kleber's picture
Update app.py
1857e8c verified
raw
history blame
2.47 kB
import nemo.collections.asr as nemo_asr
import gradio as gr
import pandas as pd
from pydub import AudioSegment
import os
asr_model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(model_name="stt_rw_conformer_ctc_large")
df = pd.read_csv("amasaku_data.tsv",sep='\t')
amasaku_mapping = {str(key).lower():str(val).lower() for key,val in zip(df.iloc[:,0],df.iloc[:,1])}
def transcribe(file):
#if not audio:
# return {state_var: state, transcription_var: state}
#print("filename: ",file)
# print("microphone: ",microphone)
# print("uploaded file: ",file)
# if microphone:
# file_name = file
# elif file:
# file_name = file
if file:
file_name = file
if not file_name:
return "No audio file provided."
try:
audio = AudioSegment.from_file(file_name)
if audio.frame_rate != 16000:
audio = audio.set_frame_rate(16000)
if audio.channels != 1:
audio = audio.set_channels(1)
audio.export(file_name.split(".")[0]+".wav",format="wav")
except Exception as e:
print(e)
transcription= asr_model.transcribe([file])
transcription = transcription[0].lower().split()
transcribed_with_amasaku = []
for word in transcription:
transcribed_with_amasaku.append(amasaku_mapping.get(word,word))
transcribed_with_amasaku = " ".join(transcribed_with_amasaku)
return transcribed_with_amasaku.capitalize()
with gr.Blocks() as demo:
# state_var = gr.State("")
with gr.Row():
with gr.Column():
microphone = gr.Audio(sources="microphone", type="filepath", label="Microphone")
# print("microphone source: ",microphone)
# print(os.listdir("/tmp"))
uploaded_audio = gr.Audio(label="Upload Audio File", type="filepath",sources="upload")
print('upload path: ',uploaded_audio)
with gr.Column():
transcription = gr.Textbox(type="text", label="Transcription")
with gr.Row():
transcribe_button = gr.Button("Transcribe")
def handle_audio(microphone, uploaded_audio):
if microphone:
return transcribe(microphone)
elif uploaded_audio:
return transcribe(uploaded_audio)
else:
return "No audio file provided."
transcribe_button.click(
handle_audio,
[microphone, uploaded_audio],
transcription
)
demo.launch()