cdactvm commited on
Commit
9a29707
·
verified ·
1 Parent(s): 4c902e5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +181 -28
app.py CHANGED
@@ -1,24 +1,36 @@
 
 
 
1
  import gradio as gr
2
  from transformers import pipeline
 
3
  import os
4
  import re
 
 
 
 
 
 
5
  os.system('git clone https://github.com/irshadbhat/indic-trans.git')
6
  os.system('pip install ./indic-trans/.')
7
- p1= pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1")
8
- p2=pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
 
9
 
10
  def transcribe_odiya(speech):
11
- #print (p1(speech))
12
  text = p1(speech)["text"]
13
- #text=cleanhtml(text)
 
14
  return text
 
15
  def transcribe_hindi(speech):
16
- #print (p1(speech))
17
  text = p2(speech)["text"]
18
- #text=cleanhtml(text)
 
19
  return text
 
20
  def transcribe_odiya_eng(speech):
21
- from indictrans import Transliterator
22
  trn = Transliterator(source='ori', target='eng', build_lookup=True)
23
  text = p1(speech)["text"]
24
  if text is None:
@@ -31,7 +43,6 @@ def transcribe_odiya_eng(speech):
31
  return process_transcription(processed_sentence)
32
 
33
  def transcribe_hin_eng(speech):
34
- from indictrans import Transliterator
35
  trn = Transliterator(source='hin', target='eng', build_lookup=True)
36
  text = p2(speech)["text"]
37
  if text is None:
@@ -42,24 +53,6 @@ def transcribe_hin_eng(speech):
42
  replaced_words = replace_words(sentence)
43
  processed_sentence = process_doubles(replaced_words)
44
  return process_transcription(processed_sentence)
45
-
46
- def sel_lng(lng,mic=None, file=None):
47
- if mic is not None:
48
- audio = mic
49
- elif file is not None:
50
- audio = file
51
- else:
52
- return "You must either provide a mic recording or a file"
53
- if (lng=="Odiya"):
54
- return transcribe_odiya(audio)
55
- elif (lng=="Odiya-trans"):
56
- return transcribe_odiya_eng(audio)
57
- elif (lng=="Hindi-trans"):
58
- return transcribe_hin_eng(audio)
59
- elif (lng=="Hindi"):
60
- return transcribe_hindi(audio)
61
-
62
- #####################################################
63
 
64
  def process_transcription(input_sentence):
65
  word_to_code_map = {}
@@ -77,6 +70,24 @@ def process_transcription(input_sentence):
77
  text = transcript_to_sentence(numbers, code_to_word_map)
78
  return text
79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  def replace_words(sentence):
81
  replacements = [
82
  (r'\bjiro\b', 'zero'), (r'\bjero\b', 'zero'), (r'\bnn\b', 'one'),
@@ -90,6 +101,7 @@ def replace_words(sentence):
90
  sentence = re.sub(pattern, replacement, sentence)
91
  return sentence
92
 
 
93
  def process_doubles(sentence):
94
  tokens = sentence.split()
95
  result = []
@@ -108,6 +120,7 @@ def process_doubles(sentence):
108
  i += 1
109
  return ' '.join(result)
110
 
 
111
  def soundex(word):
112
  word = word.upper()
113
  word = ''.join(filter(str.isalpha, word))
@@ -126,6 +139,7 @@ def soundex(word):
126
  soundex_code = soundex_code.replace('0', '') + '000'
127
  return soundex_code[:4]
128
 
 
129
  def is_number(x):
130
  if type(x) == str:
131
  x = x.replace(',', '')
@@ -136,8 +150,91 @@ def is_number(x):
136
  return True
137
 
138
  def text2int(textnum, numwords={}):
139
- if not textnum:
140
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  # Convert sentence to transcript using Soundex
143
  def sentence_to_transcript(sentence, word_to_code_map):
@@ -151,6 +248,62 @@ def sentence_to_transcript(sentence, word_to_code_map):
151
 
152
  transcript = ' '.join(transcript_codes)
153
  return transcript
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  ######################################################
156
  demo=gr.Interface(
 
1
+ import warnings
2
+ warnings.filterwarnings("ignore")
3
+
4
  import gradio as gr
5
  from transformers import pipeline
6
+ from indictrans import Transliterator
7
  import os
8
  import re
9
+ import torchaudio
10
+
11
+
12
+ # Initialize the speech recognition pipeline and transliterator
13
+ pipe = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1")
14
+ trn = Transliterator(source='ori', target='eng', build_lookup=True)
15
  os.system('git clone https://github.com/irshadbhat/indic-trans.git')
16
  os.system('pip install ./indic-trans/.')
17
+
18
+ p1 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-odia_v1")
19
+ p2 = pipeline(task="automatic-speech-recognition", model="cdactvm/w2v-bert-2.0-hindi_v1")
20
 
21
  def transcribe_odiya(speech):
 
22
  text = p1(speech)["text"]
23
+ if text is None:
24
+ return "Error: ASR returned None"
25
  return text
26
+
27
  def transcribe_hindi(speech):
 
28
  text = p2(speech)["text"]
29
+ if text is None:
30
+ return "Error: ASR returned None"
31
  return text
32
+
33
  def transcribe_odiya_eng(speech):
 
34
  trn = Transliterator(source='ori', target='eng', build_lookup=True)
35
  text = p1(speech)["text"]
36
  if text is None:
 
43
  return process_transcription(processed_sentence)
44
 
45
  def transcribe_hin_eng(speech):
 
46
  trn = Transliterator(source='hin', target='eng', build_lookup=True)
47
  text = p2(speech)["text"]
48
  if text is None:
 
53
  replaced_words = replace_words(sentence)
54
  processed_sentence = process_doubles(replaced_words)
55
  return process_transcription(processed_sentence)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def process_transcription(input_sentence):
58
  word_to_code_map = {}
 
70
  text = transcript_to_sentence(numbers, code_to_word_map)
71
  return text
72
 
73
+ def sel_lng(lng, mic=None, file=None):
74
+ if mic is not None:
75
+ audio = mic
76
+ elif file is not None:
77
+ audio = file
78
+ else:
79
+ return "You must either provide a mic recording or a file"
80
+
81
+ if lng == "Odiya":
82
+ return transcribe_odiya(audio)
83
+ elif lng == "Odiya-trans":
84
+ return transcribe_odiya_eng(audio)
85
+ elif lng == "Hindi-trans":
86
+ return transcribe_hin_eng(audio)
87
+ elif lng == "Hindi":
88
+ return transcribe_hindi(audio)
89
+
90
+ # Function to replace incorrectly spelled words
91
  def replace_words(sentence):
92
  replacements = [
93
  (r'\bjiro\b', 'zero'), (r'\bjero\b', 'zero'), (r'\bnn\b', 'one'),
 
101
  sentence = re.sub(pattern, replacement, sentence)
102
  return sentence
103
 
104
+ # Function to process "double" followed by a number
105
  def process_doubles(sentence):
106
  tokens = sentence.split()
107
  result = []
 
120
  i += 1
121
  return ' '.join(result)
122
 
123
+ # Function to generate Soundex code for a word
124
  def soundex(word):
125
  word = word.upper()
126
  word = ''.join(filter(str.isalpha, word))
 
139
  soundex_code = soundex_code.replace('0', '') + '000'
140
  return soundex_code[:4]
141
 
142
+ # Function to convert text to numerical representation
143
  def is_number(x):
144
  if type(x) == str:
145
  x = x.replace(',', '')
 
150
  return True
151
 
152
  def text2int(textnum, numwords={}):
153
+ units = ['Z600', 'O500','T000','T600','F600','F100','S220','S150','E300','N500',
154
+ 'T500', 'E415', 'T410', 'T635', 'F635', 'F135', 'S235', 'S153', 'E235','N535']
155
+ tens = ['', '', 'T537', 'T637', 'F637', 'F137', 'S230', 'S153', 'E230', 'N530']
156
+ scales = ['H536', 'T253', 'M450', 'C600']
157
+ ordinal_words = {'oh': 'Z600', 'first': 'O500', 'second': 'T000', 'third': 'T600', 'fourth': 'F600', 'fifth': 'F100',
158
+ 'sixth': 'S200','seventh': 'S150','eighth': 'E230', 'ninth': 'N500', 'twelfth': 'T410'}
159
+ ordinal_endings = [('ieth', 'y'), ('th', '')]
160
+ if not numwords:
161
+ numwords['and'] = (1, 0)
162
+ for idx, word in enumerate(units): numwords[word] = (1, idx)
163
+ for idx, word in enumerate(tens): numwords[word] = (1, idx * 10)
164
+ for idx, word in enumerate(scales): numwords[word] = (10 ** (idx * 3 or 2), 0)
165
+
166
+ textnum = textnum.replace('-', ' ')
167
+
168
+ current = result = 0
169
+ curstring = ''
170
+ onnumber = False
171
+ lastunit = False
172
+ lastscale = False
173
+
174
+ def is_numword(x):
175
+ if is_number(x):
176
+ return True
177
+ if word in numwords:
178
+ return True
179
+ return False
180
+
181
+ def from_numword(x):
182
+ if is_number(x):
183
+ scale = 0
184
+ increment = int(x.replace(',', ''))
185
+ return scale, increment
186
+ return numwords[x]
187
+
188
+ for word in textnum.split():
189
+ if word in ordinal_words:
190
+ scale, increment = (1, ordinal_words[word])
191
+ current = current * scale + increment
192
+ if scale > 100:
193
+ result += current
194
+ current = 0
195
+ onnumber = True
196
+ lastunit = False
197
+ lastscale = False
198
+ else:
199
+ for ending, replacement in ordinal_endings:
200
+ if word.endswith(ending):
201
+ word = "%s%s" % (word[:-len(ending)], replacement)
202
+
203
+ if (not is_numword(word)) or (word == 'and' and not lastscale):
204
+ if onnumber:
205
+ curstring += repr(result + current) + " "
206
+ curstring += word + " "
207
+ result = current = 0
208
+ onnumber = False
209
+ lastunit = False
210
+ lastscale = False
211
+ else:
212
+ scale, increment = from_numword(word)
213
+ onnumber = True
214
+
215
+ if lastunit and (word not in scales):
216
+ curstring += repr(result + current)
217
+ result = current = 0
218
+
219
+ if scale > 1:
220
+ current = max(1, current)
221
+
222
+ current = current * scale + increment
223
+ if scale > 100:
224
+ result += current
225
+ current = 0
226
+
227
+ lastscale = False
228
+ lastunit = False
229
+ if word in scales:
230
+ lastscale = True
231
+ elif word in units:
232
+ lastunit = True
233
+
234
+ if onnumber:
235
+ curstring += repr(result + current)
236
+
237
+ return curstring
238
 
239
  # Convert sentence to transcript using Soundex
240
  def sentence_to_transcript(sentence, word_to_code_map):
 
248
 
249
  transcript = ' '.join(transcript_codes)
250
  return transcript
251
+
252
+ # Convert transcript back to sentence using mapping
253
+ def transcript_to_sentence(transcript, code_to_word_map):
254
+ codes = transcript.split()
255
+ sentence_words = []
256
+
257
+ for code in codes:
258
+ sentence_words.append(code_to_word_map.get(code, code))
259
+
260
+ sentence = ' '.join(sentence_words)
261
+ return sentence
262
+
263
+ # # Process the audio file
264
+ # transcript = pipe("./odia_recorded/AUD-20240614-WA0004.wav")
265
+ # text_value = transcript['text']
266
+ # sentence = trn.transform(text_value)
267
+ # replaced_words = replace_words(sentence)
268
+ # processed_sentence = process_doubles(replaced_words)
269
+
270
+ # input_sentence_1 = processed_sentence
271
+
272
+ # Create empty mappings
273
+ word_to_code_map = {}
274
+ code_to_word_map = {}
275
+
276
+ # Convert sentence to transcript
277
+ # transcript_1 = sentence_to_transcript(input_sentence_1, word_to_code_map)
278
+
279
+ # Convert transcript to numerical representation
280
+ # numbers = text2int(transcript_1)
281
+
282
+ # Create reverse mapping
283
+ code_to_word_map = {v: k for k, v in word_to_code_map.items()}
284
+
285
+ # Convert transcript back to sentence
286
+ # reconstructed_sentence_1 = transcript_to_sentence(numbers, code_to_word_map)
287
+
288
+ # demo=gr.Interface(
289
+ # fn=sel_lng,
290
+
291
+ # inputs=[
292
+
293
+ # gr.Dropdown(["Hindi","Hindi-trans","Odiya","Odiya-trans"],value="Hindi",label="Select Language"),
294
+ # gr.Audio(source="microphone", type="filepath"),
295
+ # gr.Audio(source= "upload", type="filepath"),
296
+ # #gr.Audio(sources="upload", type="filepath"),
297
+ # #"state"
298
+ # ],
299
+ # outputs=[
300
+ # "textbox"
301
+ # # #"state"
302
+ # ],
303
+ # title="Automatic Speech Recognition",
304
+ # description = "Demo for Automatic Speech Recognition. Use microphone to record speech. Please press Record button. Initially it will take some time to load the model. The recognized text will appear in the output textbox",
305
+ # ).launch()
306
+
307
 
308
  ######################################################
309
  demo=gr.Interface(