Spaces:
Runtime error
Runtime error
File size: 2,194 Bytes
37254e5 2e00f3e 37254e5 75cb74f 3ece66a 75cb74f 3ece66a 75cb74f 2e00f3e 3ece66a edf267e 2e00f3e 38e621e edf267e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import gradio as gr
import argparse
import soundfile as sf
import tempfile
from pathlib import Path
import os
import subprocess
import sys
import re
# from transformers import AutoProcessor, AutoModelForPreTraining
# processor = AutoProcessor.from_pretrained("patrickvonplaten/mms-1b")
# model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
def process(audio, model, lang, format):
with tempfile.TemporaryDirectory() as tmpdir:
print(">>> preparing tmp manifest dir ...", file=sys.stderr)
tmpdir = Path(tmpdir)
with open(tmpdir / "dev.tsv", "w") as fw:
fw.write("/\n")
for audio in audio:
nsample = sf.SoundFile(audio).frames
fw.write(f"{audio}\t{nsample}\n")
with open(tmpdir / "dev.uid", "w") as fw:
fw.write(f"{audio}\n"*len(audio))
with open(tmpdir / "dev.ltr", "w") as fw:
fw.write("d u m m y | d u m m y\n"*len(audio))
with open(tmpdir / "dev.wrd", "w") as fw:
fw.write("dummy dummy\n"*len(audio))
cmd = f"""
PYTHONPATH=. PREFIX=INFER HYDRA_FULL_ERROR=1 python infer.py -m decoding.type=viterbi dataset.max_tokens=4000000 distributed_training.distributed_world_size=1 "common_eval.path='{model}'" task.data={tmpdir} dataset.gen_subset="{lang}:dev" common_eval.post_process={format} decoding.results_path={tmpdir}
"""
print(">>> loading model & running inference ...", file=sys.stderr)
subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL,)
with open(tmpdir/"hypo.word") as fr:
for ii, hypo in enumerate(fr):
hypo = re.sub("\(\S+\)$", "", hypo).strip()
print(f'===============\nInput: {audio[ii]}\nOutput: {hypo}')
def transcribe(audio):
model = "base_300m.pt"
lang = "eng"
format = "letter"
process(np.ravel(audio), model, lang, format)
gr.Interface(
title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',
fn=transcribe,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath")
],
outputs=[
"textbox"
],
live=True).launch() |