cdactvm commited on
Commit
b2cbd31
·
verified ·
1 Parent(s): 76eef77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -6
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import warnings
2
  import gradio as gr
3
  from transformers import pipeline
 
 
 
4
 
5
  import os
6
  import re
@@ -12,8 +15,24 @@ p2 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-h
12
  #p3 = pipeline(task="automatic-speech-recognition", model="cdactvm/kannada_w2v-bert_model")
13
  #p4 = pipeline(task="automatic-speech-recognition", model="cdactvm/telugu_w2v-bert_model")
14
  #p5 = pipeline(task="automatic-speech-recognition", model="Sajjo/w2v-bert-2.0-bangala-gpu-CV16.0_v2")
15
- p6 = pipeline(task="automatic-speech-recognition", model="cdactvm/hf-open-assames")
16
  p7 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-assames")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  os.system('git clone https://github.com/irshadbhat/indic-trans.git')
18
  os.system('pip install ./indic-trans/.')
19
 
@@ -55,8 +74,8 @@ def transcribe_bangala(speech):
55
  return "Error: ASR returned None"
56
  return text
57
 
58
- def transcribe_assamese_model1(speech):
59
- text = p6(speech)["text"]
60
  text = cleanhtml(text)
61
  if text is None:
62
  return "Error: ASR returned None"
@@ -174,8 +193,8 @@ def sel_lng(lng, mic=None, file=None):
174
  return transcribe_ban_eng(audio)
175
  elif lng == "Bangala":
176
  return transcribe_bangala(audio)
177
- elif lng == "Assamese-Model1":
178
- return transcribe_assamese_model1(audio)
179
  elif lng == "Assamese-Model2":
180
  return transcribe_assamese_model2(audio)
181
 
@@ -404,7 +423,7 @@ demo=gr.Interface(
404
  inputs=[
405
 
406
  #gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans","Kannada","Kannada-trans","Telugu","Telugu-trans","Bangala","Bangala-trans"],value="Hindi",label="Select Language"),
407
- gr.Dropdown(["Hindi","Hindi-trans","Assamese-Model1","Assamese-Model2"],value="Hindi",label="Select Language"),
408
  gr.Audio(sources=["microphone","upload"], type="filepath"),
409
  #gr.Audio(sources="upload", type="filepath"),
410
  #"state"
 
1
  import warnings
2
  import gradio as gr
3
  from transformers import pipeline
4
+ from transformers import AutoProcessor
5
+ from pyctcdecode import build_ctcdecoder
6
+ from transformers import Wav2Vec2ProcessorWithLM
7
 
8
  import os
9
  import re
 
15
  #p3 = pipeline(task="automatic-speech-recognition", model="cdactvm/kannada_w2v-bert_model")
16
  #p4 = pipeline(task="automatic-speech-recognition", model="cdactvm/telugu_w2v-bert_model")
17
  #p5 = pipeline(task="automatic-speech-recognition", model="Sajjo/w2v-bert-2.0-bangala-gpu-CV16.0_v2")
18
+ #p6 = pipeline(task="automatic-speech-recognition", model="cdactvm/hf-open-assames")
19
  p7 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-assames")
20
+ processor = AutoProcessor.from_pretrained("cdactvm/w2v-assames")
21
+ vocab_dict = processor.tokenizer.get_vocab()
22
+ sorted_vocab_dict = {k.lower(): v for k, v in sorted(vocab_dict.items(), key=lambda item: item[1])}
23
+ decoder = build_ctcdecoder(
24
+ labels=list(sorted_vocab_dict.keys()),
25
+ kenlm_model_path="lm.binary",
26
+ )
27
+ processor_with_lm = Wav2Vec2ProcessorWithLM(
28
+ feature_extractor=processor.feature_extractor,
29
+ tokenizer=processor.tokenizer,
30
+ decoder=decoder
31
+ )
32
+ processor.feature_extractor._processor_class = "Wav2Vec2ProcessorWithLM"
33
+ p8 = pipeline("automatic-speech-recognition", model="cdactvm/w2v-assames", tokenizer=processor_with_lm, feature_extractor=processor_with_lm.feature_extractor, decoder=processor_with_lm.decoder)
34
+
35
+
36
  os.system('git clone https://github.com/irshadbhat/indic-trans.git')
37
  os.system('pip install ./indic-trans/.')
38
 
 
74
  return "Error: ASR returned None"
75
  return text
76
 
77
+ def transcribe_assamese_LM(speech):
78
+ text = p8(speech)["text"]
79
  text = cleanhtml(text)
80
  if text is None:
81
  return "Error: ASR returned None"
 
193
  return transcribe_ban_eng(audio)
194
  elif lng == "Bangala":
195
  return transcribe_bangala(audio)
196
+ elif lng == "Assamese-LM":
197
+ return transcribe_assamese_LM(audio)
198
  elif lng == "Assamese-Model2":
199
  return transcribe_assamese_model2(audio)
200
 
 
423
  inputs=[
424
 
425
  #gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans","Kannada","Kannada-trans","Telugu","Telugu-trans","Bangala","Bangala-trans"],value="Hindi",label="Select Language"),
426
+ gr.Dropdown(["Hindi","Hindi-trans","Assamese-LM","Assamese-Model2"],value="Hindi",label="Select Language"),
427
  gr.Audio(sources=["microphone","upload"], type="filepath"),
428
  #gr.Audio(sources="upload", type="filepath"),
429
  #"state"