GreenRaptor commited on
Commit
2e00f3e
·
1 Parent(s): 37254e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -3
app.py CHANGED
@@ -1,5 +1,13 @@
1
  import gradio as gr
2
- from subprocess import Popen
 
 
 
 
 
 
 
 
3
 
4
  # from transformers import AutoProcessor, AutoModelForPreTraining
5
 
@@ -7,10 +15,36 @@ from subprocess import Popen
7
 
8
  # model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
9
 
10
- model = "base_300m.pt"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  def transcribe(audio):
13
- Popen('python mms_infer.py --model model --lang eng --audio audio')
 
 
 
14
 
15
  gr.Interface(
16
  title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',
 
1
  import gradio as gr
2
+
3
+ import argparse
4
+ import soundfile as sf
5
+ import tempfile
6
+ from pathlib import Path
7
+ import os
8
+ import subprocess
9
+ import sys
10
+ import re
11
 
12
  # from transformers import AutoProcessor, AutoModelForPreTraining
13
 
 
15
 
16
  # model = AutoModelForPreTraining.from_pretrained("patrickvonplaten/mms-1b")
17
 
18
+ def process(audio, model, lang, format):
19
+ with tempfile.TemporaryDirectory() as tmpdir:
20
+ print(">>> preparing tmp manifest dir ...", file=sys.stderr)
21
+ tmpdir = Path(tmpdir)
22
+ with open(tmpdir / "dev.tsv", "w") as fw:
23
+ fw.write("/\n")
24
+ for audio in audio:
25
+ nsample = sf.SoundFile(audio).frames
26
+ fw.write(f"{audio}\t{nsample}\n")
27
+ with open(tmpdir / "dev.uid", "w") as fw:
28
+ fw.write(f"{audio}\n"*len(audio))
29
+ with open(tmpdir / "dev.ltr", "w") as fw:
30
+ fw.write("d u m m y | d u m m y\n"*len(audio))
31
+ with open(tmpdir / "dev.wrd", "w") as fw:
32
+ fw.write("dummy dummy\n"*len(audio))
33
+ cmd = f"""
34
+ PYTHONPATH=. PREFIX=INFER HYDRA_FULL_ERROR=1 python infer.py -m decoding.type=viterbi dataset.max_tokens=4000000 distributed_training.distributed_world_size=1 "common_eval.path='{model}'" task.data={tmpdir} dataset.gen_subset="{lang}:dev" common_eval.post_process={format} decoding.results_path={tmpdir}
35
+ """
36
+ print(">>> loading model & running inference ...", file=sys.stderr)
37
+ subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL,)
38
+ with open(tmpdir/"hypo.word") as fr:
39
+ for ii, hypo in enumerate(fr):
40
+ hypo = re.sub("\(\S+\)$", "", hypo).strip()
41
+ print(f'===============\nInput: {audio[ii]}\nOutput: {hypo}')
42
 
43
  def transcribe(audio):
44
+ model = "base_300m.pt"
45
+ lang = "eng"
46
+ format = "letter"
47
+ process(audio, model, lang, format)
48
 
49
  gr.Interface(
50
  title = 'MetaAI (Facebook Research) MMS (Massively Multilingual Speech) ASR',