cdactvm commited on
Commit
b41335f
·
verified ·
1 Parent(s): d267a37

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -14
app.py CHANGED
@@ -17,22 +17,22 @@ from convert2list import convert_to_list
17
  from processDoubles import process_doubles
18
  from replaceWords import replace_words
19
 
20
- transcriber = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
21
- processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-2.0-hindi_v1")
22
 
23
- vocab_dict = processor.tokenizer.get_vocab()
24
 
25
- sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
26
- decoder = build_ctcdecoder(
27
- labels=list(sorted_vocab_dict.keys()),
28
- kenlm_model_path="lm.binary",
29
- )
30
- processor_with_lm = Wav2Vec2ProcessorWithLM(
31
- feature_extractor=processor.feature_extractor,
32
- tokenizer=processor.tokenizer,
33
- decoder=decoder
34
- )
35
- processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
36
 
37
 
38
  def transcribe(audio):
 
17
  from processDoubles import process_doubles
18
  from replaceWords import replace_words
19
 
20
+ # transcriber = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
21
+ # processor = AutoProcessor.from_pretrained("cdactvm/w2v-bert-2.0-hindi_v1")
22
 
23
+ # vocab_dict = processor.tokenizer.get_vocab()
24
 
25
+ # sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
26
+ # decoder = build_ctcdecoder(
27
+ # labels=list(sorted_vocab_dict.keys()),
28
+ # kenlm_model_path="lm.binary",
29
+ # )
30
+ # processor_with_lm = Wav2Vec2ProcessorWithLM(
31
+ # feature_extractor=processor.feature_extractor,
32
+ # tokenizer=processor.tokenizer,
33
+ # decoder=decoder
34
+ # )
35
+ # processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
36
 
37
 
38
  def transcribe(audio):