Spaces:
Runtime error
Runtime error
GreenRaptor
commited on
Commit
·
2e00f3e
1
Parent(s):
37254e5
Update app.py
Browse files
app.py
CHANGED
@@ -1,5 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# from transformers import AutoProcessor, AutoModelForPreTraining
|
5 |
|
@@ -7,10 +15,36 @@ from subprocess import Popen
|
|
7 |
|
8 |
# model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
|
9 |
|
10 |
-
model
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
def transcribe(audio):
|
13 |
-
|
|
|
|
|
|
|
14 |
|
15 |
gr.Interface(
|
16 |
title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',
|
|
|
1 |
import gradio as gr
|
2 |
+
|
3 |
+
import argparse
|
4 |
+
import soundfile as sf
|
5 |
+
import tempfile
|
6 |
+
from pathlib import Path
|
7 |
+
import os
|
8 |
+
import subprocess
|
9 |
+
import sys
|
10 |
+
import re
|
11 |
|
12 |
# from transformers import AutoProcessor, AutoModelForPreTraining
|
13 |
|
|
|
15 |
|
16 |
# model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
|
17 |
|
18 |
+
def process(audio, model, lang, format):
|
19 |
+
with tempfile.TemporaryDirectory() as tmpdir:
|
20 |
+
print(">>> preparing tmp manifest dir ...", file=sys.stderr)
|
21 |
+
tmpdir = Path(tmpdir)
|
22 |
+
with open(tmpdir / "dev.tsv", "w") as fw:
|
23 |
+
fw.write("/\n")
|
24 |
+
for audio in audio:
|
25 |
+
nsample = sf.SoundFile(audio).frames
|
26 |
+
fw.write(f"{audio}\t{nsample}\n")
|
27 |
+
with open(tmpdir / "dev.uid", "w") as fw:
|
28 |
+
fw.write(f"{audio}\n"*len(audio))
|
29 |
+
with open(tmpdir / "dev.ltr", "w") as fw:
|
30 |
+
fw.write("d u m m y | d u m m y\n"*len(audio))
|
31 |
+
with open(tmpdir / "dev.wrd", "w") as fw:
|
32 |
+
fw.write("dummy dummy\n"*len(audio))
|
33 |
+
cmd = f"""
|
34 |
+
PYTHONPATH=. PREFIX=INFER HYDRA_FULL_ERROR=1 python infer.py -m decoding.type=viterbi dataset.max_tokens=4000000 distributed_training.distributed_world_size=1 "common_eval.path='{model}'" task.data={tmpdir} dataset.gen_subset="{lang}:dev" common_eval.post_process={format} decoding.results_path={tmpdir}
|
35 |
+
"""
|
36 |
+
print(">>> loading model & running inference ...", file=sys.stderr)
|
37 |
+
subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL,)
|
38 |
+
with open(tmpdir/"hypo.word") as fr:
|
39 |
+
for ii, hypo in enumerate(fr):
|
40 |
+
hypo = re.sub("\(\S+\)$", "", hypo).strip()
|
41 |
+
print(f'===============\nInput: {audio[ii]}\nOutput: {hypo}')
|
42 |
|
43 |
def transcribe(audio):
|
44 |
+
model = "base_300m.pt"
|
45 |
+
lang = "eng"
|
46 |
+
format = "letter"
|
47 |
+
process(audio, model, lang, format)
|
48 |
|
49 |
gr.Interface(
|
50 |
title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',
|