File size: 1,744 Bytes
4a8a770 4834725 4a8a770 21d3cbb 4a8a770 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import nemo.collections.asr as nemo_asr
import gradio as gr
asr_model = nemo_asr.models.EncDecHybridRNNTCTCBPEModel.from_pretrained("theodotus/stt_ua_fastconformer_hybrid_large_pc", map_location="cpu")
def process_file(in_filename: str,):
if in_filename is None or in_filename == "":
return "Error: No file"
transcript = asr_model.transcribe(paths2audio_files = [in_filename])[0][0]
return transcript
demo = gr.Blocks()
with demo:
with gr.Tabs():
with gr.TabItem("Upload from disk"):
uploaded_file = gr.Audio(
source="upload", # Choose between "microphone", "upload"
type="filepath",
optional=False,
label="Upload from disk",
)
upload_button = gr.Button("Submit for recognition")
uploaded_output = gr.Textbox(label="Recognized speech from uploaded file")
with gr.TabItem("Record from microphone"):
microphone = gr.Audio(
source="microphone", # Choose between "microphone", "upload"
type="filepath",
optional=False,
label="Record from microphone",
)
record_button = gr.Button("Submit for recognition")
recorded_output = gr.Textbox(label="Recognized speech from recordings")
upload_button.click(
process_file,
inputs=[
uploaded_file,
],
outputs=[uploaded_output],
)
record_button.click(
process_file,
inputs=[
microphone,
],
outputs=[recorded_output],
)
if __name__ == "__main__":
demo.launch() |