vineelpratap
commited on
Commit
•
5442f52
1
Parent(s):
a45002a
Update asr.py
Browse files
asr.py
CHANGED
@@ -20,50 +20,52 @@ processor = AutoProcessor.from_pretrained(MODEL_ID)
|
|
20 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
21 |
|
22 |
|
23 |
-
lm_decoding_config = {}
|
24 |
-
lm_decoding_configfile = hf_hub_download(
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
)
|
29 |
-
|
30 |
-
with open(lm_decoding_configfile) as f:
|
31 |
-
|
32 |
-
|
33 |
-
# allow language model decoding for "eng"
|
34 |
-
|
35 |
-
decoding_config = lm_decoding_config["eng"]
|
36 |
-
|
37 |
-
lm_file = hf_hub_download(
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
)
|
42 |
-
token_file = hf_hub_download(
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
)
|
47 |
-
lexicon_file = None
|
48 |
-
if decoding_config["lexiconfile"] is not None:
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
|
55 |
-
beam_search_decoder = ctc_decoder(
|
56 |
-
lexicon=lexicon_file,
|
57 |
-
tokens=token_file,
|
58 |
-
lm=lm_file,
|
59 |
-
nbest=1,
|
60 |
-
beam_size=500,
|
61 |
-
beam_size_token=50,
|
62 |
-
lm_weight=float(decoding_config["lmweight"]),
|
63 |
-
word_score=float(decoding_config["wordscore"]),
|
64 |
-
sil_score=float(decoding_config["silweight"]),
|
65 |
-
blank_token="<s>",
|
66 |
-
)
|
67 |
|
68 |
def transcribe(
|
69 |
audio_source=None, microphone=None, file_upload=None, lang="eng (English)"
|
|
|
20 |
model = Wav2Vec2ForCTC.from_pretrained(MODEL_ID)
|
21 |
|
22 |
|
23 |
+
# lm_decoding_config = {}
|
24 |
+
# lm_decoding_configfile = hf_hub_download(
|
25 |
+
# repo_id="facebook/mms-cclms",
|
26 |
+
# filename="decoding_config.json",
|
27 |
+
# subfolder="mms-1b-all",
|
28 |
+
# )
|
29 |
+
|
30 |
+
# with open(lm_decoding_configfile) as f:
|
31 |
+
# lm_decoding_config = json.loads(f.read())
|
32 |
+
|
33 |
+
# # allow language model decoding for "eng"
|
34 |
+
|
35 |
+
# decoding_config = lm_decoding_config["eng"]
|
36 |
+
|
37 |
+
# lm_file = hf_hub_download(
|
38 |
+
# repo_id="facebook/mms-cclms",
|
39 |
+
# filename=decoding_config["lmfile"].rsplit("/", 1)[1],
|
40 |
+
# subfolder=decoding_config["lmfile"].rsplit("/", 1)[0],
|
41 |
+
# )
|
42 |
+
# token_file = hf_hub_download(
|
43 |
+
# repo_id="facebook/mms-cclms",
|
44 |
+
# filename=decoding_config["tokensfile"].rsplit("/", 1)[1],
|
45 |
+
# subfolder=decoding_config["tokensfile"].rsplit("/", 1)[0],
|
46 |
+
# )
|
47 |
+
# lexicon_file = None
|
48 |
+
# if decoding_config["lexiconfile"] is not None:
|
49 |
+
# lexicon_file = hf_hub_download(
|
50 |
+
# repo_id="facebook/mms-cclms",
|
51 |
+
# filename=decoding_config["lexiconfile"].rsplit("/", 1)[1],
|
52 |
+
# subfolder=decoding_config["lexiconfile"].rsplit("/", 1)[0],
|
53 |
+
# )
|
54 |
+
|
55 |
+
beam_search_decoder = None
|
56 |
+
# beam_search_decoder = ctc_decoder(
|
57 |
+
# lexicon=lexicon_file,
|
58 |
+
# tokens=token_file,
|
59 |
+
# lm=lm_file,
|
60 |
+
# nbest=1,
|
61 |
+
# beam_size=500,
|
62 |
+
# beam_size_token=50,
|
63 |
+
# lm_weight=float(decoding_config["lmweight"]),
|
64 |
+
# word_score=float(decoding_config["wordscore"]),
|
65 |
+
# sil_score=float(decoding_config["silweight"]),
|
66 |
+
# blank_token="<s>",
|
67 |
+
# )
|
68 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
def transcribe(
|
71 |
audio_source=None, microphone=None, file_upload=None, lang="eng (English)"
|