Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -41,188 +41,9 @@ os.system('pip install ./indic-trans/.')
|
|
41 |
#hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "asr_demo")
|
42 |
|
43 |
from indictrans import Transliterator
|
44 |
-
trn = Transliterator(source='ori', target='eng', build_lookup=True)
|
45 |
-
def transcribe_odiya(speech):
|
46 |
-
text = p1(speech)["text"]
|
47 |
-
if text is None:
|
48 |
-
return "Error: ASR returned None"
|
49 |
-
return text
|
50 |
-
|
51 |
-
###################################
|
52 |
-
def transcribe_odiya_model2(speech):
|
53 |
-
text = odia_model2(speech)["text"]
|
54 |
-
if text is None:
|
55 |
-
return "Error: ASR returned None"
|
56 |
-
return text
|
57 |
-
|
58 |
-
def transcribe_odiya_eng_model2(speech):
|
59 |
-
trn = Transliterator(source='ori', target='eng', build_lookup=True)
|
60 |
-
text = odia_model2(speech)["text"]
|
61 |
-
if text is None:
|
62 |
-
return "Error: ASR returned None"
|
63 |
-
sentence = trn.transform(text)
|
64 |
-
if sentence is None:
|
65 |
-
return "Error: Transliteration returned None"
|
66 |
-
replaced_words = replace_words(sentence)
|
67 |
-
processed_sentence = process_doubles(replaced_words)
|
68 |
-
return process_transcription(processed_sentence)
|
69 |
-
|
70 |
-
########################################
|
71 |
-
def cleanhtml(raw_html):
|
72 |
-
cleantext = re.sub(r'<.*?>', '', raw_html)
|
73 |
-
return cleantext
|
74 |
-
|
75 |
-
def transcribe_hindi(speech):
|
76 |
-
text = p2(speech)["text"]
|
77 |
-
if text is None:
|
78 |
-
return "Error: ASR returned None"
|
79 |
-
return text
|
80 |
-
def transcribe_kannada(speech):
|
81 |
-
text = p3(speech)["text"]
|
82 |
-
if text is None:
|
83 |
-
return "Error: ASR returned None"
|
84 |
-
return text
|
85 |
-
def transcribe_telugu(speech):
|
86 |
-
text = p4(speech)["text"]
|
87 |
-
if text is None:
|
88 |
-
return "Error: ASR returned None"
|
89 |
-
return text
|
90 |
-
|
91 |
-
def transcribe_bangala(speech):
|
92 |
-
text = p5(speech)["text"]
|
93 |
-
if text is None:
|
94 |
-
return "Error: ASR returned None"
|
95 |
-
return text
|
96 |
-
|
97 |
-
def transcribe_assamese_LM(speech):
|
98 |
-
text = p8(speech)["text"]
|
99 |
-
text = cleanhtml(text)
|
100 |
-
if text is None:
|
101 |
-
return "Error: ASR returned None"
|
102 |
-
return text
|
103 |
-
|
104 |
-
def transcribe_assamese_model2(speech):
|
105 |
-
text = p7(speech)["text"]
|
106 |
-
text = cleanhtml(text)
|
107 |
-
if text is None:
|
108 |
-
return "Error: ASR returned None"
|
109 |
-
return text
|
110 |
-
|
111 |
-
def transcribe_odiya_eng(speech):
|
112 |
-
trn = Transliterator(source='ori', target='eng', build_lookup=True)
|
113 |
-
text = p1(speech)["text"]
|
114 |
-
if text is None:
|
115 |
-
return "Error: ASR returned None"
|
116 |
-
sentence = trn.transform(text)
|
117 |
-
if sentence is None:
|
118 |
-
return "Error: Transliteration returned None"
|
119 |
-
replaced_words = replace_words(sentence)
|
120 |
-
processed_sentence = process_doubles(replaced_words)
|
121 |
-
return process_transcription(processed_sentence)
|
122 |
-
|
123 |
-
def transcribe_ban_eng(speech):
|
124 |
-
trn = Transliterator(source='ben', target='eng', build_lookup=True)
|
125 |
-
text = p5(speech)["text"]
|
126 |
-
if text is None:
|
127 |
-
return "Error: ASR returned None"
|
128 |
-
sentence = trn.transform(text)
|
129 |
-
if sentence is None:
|
130 |
-
return "Error: Transliteration returned None"
|
131 |
-
replaced_words = replace_words(sentence)
|
132 |
-
processed_sentence = process_doubles(replaced_words)
|
133 |
-
return process_transcription(processed_sentence)
|
134 |
-
|
135 |
-
def transcribe_hin_eng(speech):
|
136 |
-
trn = Transliterator(source='hin', target='eng', build_lookup=True)
|
137 |
-
text = p2(speech)["text"]
|
138 |
-
if text is None:
|
139 |
-
return "Error: ASR returned None"
|
140 |
-
sentence = trn.transform(text)
|
141 |
-
if sentence is None:
|
142 |
-
return "Error: Transliteration returned None"
|
143 |
-
replaced_words = replace_words(sentence)
|
144 |
-
processed_sentence = process_doubles(replaced_words)
|
145 |
-
return process_transcription(processed_sentence)
|
146 |
-
|
147 |
-
def transcribe_kan_eng(speech):
|
148 |
-
trn = Transliterator(source='kan', target='eng', build_lookup=True)
|
149 |
-
text = p3(speech)["text"]
|
150 |
-
if text is None:
|
151 |
-
return "Error: ASR returned None"
|
152 |
-
sentence = trn.transform(text)
|
153 |
-
if sentence is None:
|
154 |
-
return "Error: Transliteration returned None"
|
155 |
-
replaced_words = replace_words(sentence)
|
156 |
-
processed_sentence = process_doubles(replaced_words)
|
157 |
-
return process_transcription(processed_sentence)
|
158 |
-
|
159 |
-
def transcribe_tel_eng(speech):
|
160 |
-
trn = Transliterator(source='tel', target='eng', build_lookup=True)
|
161 |
-
text = p4(speech)["text"]
|
162 |
-
if text is None:
|
163 |
-
return "Error: ASR returned None"
|
164 |
-
sentence = trn.transform(text)
|
165 |
-
if sentence is None:
|
166 |
-
return "Error: Transliteration returned None"
|
167 |
-
replaced_words = replace_words(sentence)
|
168 |
-
processed_sentence = process_doubles(replaced_words)
|
169 |
-
return process_transcription(processed_sentence)
|
170 |
-
|
171 |
|
172 |
-
|
173 |
-
word_to_code_map = {}
|
174 |
-
code_to_word_map = {}
|
175 |
-
|
176 |
-
transcript_1 = sentence_to_transcript(input_sentence, word_to_code_map)
|
177 |
-
if transcript_1 is None:
|
178 |
-
return "Error: Transcript conversion returned None"
|
179 |
-
|
180 |
-
numbers = text2int(transcript_1)
|
181 |
-
if numbers is None:
|
182 |
-
return "Error: Text to number conversion returned None"
|
183 |
|
184 |
-
code_to_word_map = {v: k for k, v in word_to_code_map.items()}
|
185 |
-
text = transcript_to_sentence(numbers, code_to_word_map)
|
186 |
-
return text
|
187 |
-
|
188 |
-
def sel_lng(lng, mic=None, file=None):
|
189 |
-
if mic is not None:
|
190 |
-
audio = mic
|
191 |
-
elif file is not None:
|
192 |
-
audio = file
|
193 |
-
else:
|
194 |
-
return "You must either provide a mic recording or a file"
|
195 |
-
|
196 |
-
if lng == "Odiya":
|
197 |
-
return transcribe_odiya(audio)
|
198 |
-
elif lng == "Odiya-trans":
|
199 |
-
return transcribe_odiya_eng(audio)
|
200 |
-
elif lng == "Hindi-trans":
|
201 |
-
return transcribe_hin_eng(audio)
|
202 |
-
elif lng == "Hindi":
|
203 |
-
return transcribe_hindi(audio)
|
204 |
-
elif lng == "Kannada-trans":
|
205 |
-
return transcribe_kan_eng(audio)
|
206 |
-
elif lng == "Kannada":
|
207 |
-
return transcribe_kannada(audio)
|
208 |
-
elif lng == "Telugu-trans":
|
209 |
-
return transcribe_tel_eng(audio)
|
210 |
-
elif lng == "Telugu":
|
211 |
-
return transcribe_telugu(audio)
|
212 |
-
elif lng == "Bangala-trans":
|
213 |
-
return transcribe_ban_eng(audio)
|
214 |
-
elif lng == "Bangala":
|
215 |
-
return transcribe_bangala(audio)
|
216 |
-
elif lng == "Assamese-LM":
|
217 |
-
return transcribe_assamese_LM(audio)
|
218 |
-
elif lng == "Assamese-Model2":
|
219 |
-
return transcribe_assamese_model2(audio)
|
220 |
-
elif lng == "Odia_model2":
|
221 |
-
return transcribe_odiya_model2(audio)
|
222 |
-
elif lng == "Odia_trans_model2":
|
223 |
-
return transcribe_odiya_eng_model2(audio)
|
224 |
-
|
225 |
-
|
226 |
# Function to replace incorrectly spelled words
|
227 |
def replace_words(sentence):
|
228 |
replacements = [
|
@@ -425,6 +246,191 @@ code_to_word_map = {}
|
|
425 |
# Create reverse mapping
|
426 |
code_to_word_map = {v: k for k, v in word_to_code_map.items()}
|
427 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
428 |
# Convert transcript back to sentence
|
429 |
# reconstructed_sentence_1 = transcript_to_sentence(numbers, code_to_word_map)
|
430 |
|
|
|
41 |
#hf_writer = gr.HuggingFaceDatasetSaver(HF_TOKEN, "asr_demo")
|
42 |
|
43 |
from indictrans import Transliterator
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
|
45 |
+
###########################################
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
# Function to replace incorrectly spelled words
|
48 |
def replace_words(sentence):
|
49 |
replacements = [
|
|
|
246 |
# Create reverse mapping
|
247 |
code_to_word_map = {v: k for k, v in word_to_code_map.items()}
|
248 |
|
249 |
+
###########################################
|
250 |
+
|
251 |
+
def transcribe_odiya(speech):
|
252 |
+
text = p1(speech)["text"]
|
253 |
+
if text is None:
|
254 |
+
return "Error: ASR returned None"
|
255 |
+
return text
|
256 |
+
|
257 |
+
###################################
|
258 |
+
def transcribe_odiya_model2(speech):
|
259 |
+
text = odia_model2(speech)["text"]
|
260 |
+
if text is None:
|
261 |
+
return "Error: ASR returned None"
|
262 |
+
return text
|
263 |
+
|
264 |
+
def transcribe_odiya_eng_model2(speech):
|
265 |
+
trn = Transliterator(source='ori', target='eng', build_lookup=True)
|
266 |
+
text = odia_model2(speech)["text"]
|
267 |
+
if text is None:
|
268 |
+
return "Error: ASR returned None"
|
269 |
+
sentence = trn.transform(text)
|
270 |
+
if sentence is None:
|
271 |
+
return "Error: Transliteration returned None"
|
272 |
+
replaced_words = replace_words(sentence)
|
273 |
+
processed_sentence = process_doubles(replaced_words)
|
274 |
+
return process_transcription(processed_sentence)
|
275 |
+
|
276 |
+
########################################
|
277 |
+
def cleanhtml(raw_html):
|
278 |
+
cleantext = re.sub(r'<.*?>', '', raw_html)
|
279 |
+
return cleantext
|
280 |
+
|
281 |
+
def transcribe_hindi(speech):
|
282 |
+
text = p2(speech)["text"]
|
283 |
+
if text is None:
|
284 |
+
return "Error: ASR returned None"
|
285 |
+
return text
|
286 |
+
def transcribe_kannada(speech):
|
287 |
+
text = p3(speech)["text"]
|
288 |
+
if text is None:
|
289 |
+
return "Error: ASR returned None"
|
290 |
+
return text
|
291 |
+
def transcribe_telugu(speech):
|
292 |
+
text = p4(speech)["text"]
|
293 |
+
if text is None:
|
294 |
+
return "Error: ASR returned None"
|
295 |
+
return text
|
296 |
+
|
297 |
+
def transcribe_bangala(speech):
|
298 |
+
text = p5(speech)["text"]
|
299 |
+
if text is None:
|
300 |
+
return "Error: ASR returned None"
|
301 |
+
return text
|
302 |
+
|
303 |
+
def transcribe_assamese_LM(speech):
|
304 |
+
text = p8(speech)["text"]
|
305 |
+
text = cleanhtml(text)
|
306 |
+
if text is None:
|
307 |
+
return "Error: ASR returned None"
|
308 |
+
return text
|
309 |
+
|
310 |
+
def transcribe_assamese_model2(speech):
|
311 |
+
text = p7(speech)["text"]
|
312 |
+
text = cleanhtml(text)
|
313 |
+
if text is None:
|
314 |
+
return "Error: ASR returned None"
|
315 |
+
return text
|
316 |
+
|
317 |
+
def transcribe_odiya_eng(speech):
|
318 |
+
trn = Transliterator(source='ori', target='eng', build_lookup=True)
|
319 |
+
text = p1(speech)["text"]
|
320 |
+
if text is None:
|
321 |
+
return "Error: ASR returned None"
|
322 |
+
sentence = trn.transform(text)
|
323 |
+
if sentence is None:
|
324 |
+
return "Error: Transliteration returned None"
|
325 |
+
replaced_words = replace_words(sentence)
|
326 |
+
processed_sentence = process_doubles(replaced_words)
|
327 |
+
return process_transcription(processed_sentence)
|
328 |
+
|
329 |
+
def transcribe_ban_eng(speech):
|
330 |
+
trn = Transliterator(source='ben', target='eng', build_lookup=True)
|
331 |
+
text = p5(speech)["text"]
|
332 |
+
if text is None:
|
333 |
+
return "Error: ASR returned None"
|
334 |
+
sentence = trn.transform(text)
|
335 |
+
if sentence is None:
|
336 |
+
return "Error: Transliteration returned None"
|
337 |
+
replaced_words = replace_words(sentence)
|
338 |
+
processed_sentence = process_doubles(replaced_words)
|
339 |
+
return process_transcription(processed_sentence)
|
340 |
+
|
341 |
+
def transcribe_hin_eng(speech):
|
342 |
+
trn = Transliterator(source='hin', target='eng', build_lookup=True)
|
343 |
+
text = p2(speech)["text"]
|
344 |
+
if text is None:
|
345 |
+
return "Error: ASR returned None"
|
346 |
+
sentence = trn.transform(text)
|
347 |
+
if sentence is None:
|
348 |
+
return "Error: Transliteration returned None"
|
349 |
+
replaced_words = replace_words(sentence)
|
350 |
+
processed_sentence = process_doubles(replaced_words)
|
351 |
+
return process_transcription(processed_sentence)
|
352 |
+
|
353 |
+
def transcribe_kan_eng(speech):
|
354 |
+
trn = Transliterator(source='kan', target='eng', build_lookup=True)
|
355 |
+
text = p3(speech)["text"]
|
356 |
+
if text is None:
|
357 |
+
return "Error: ASR returned None"
|
358 |
+
sentence = trn.transform(text)
|
359 |
+
if sentence is None:
|
360 |
+
return "Error: Transliteration returned None"
|
361 |
+
replaced_words = replace_words(sentence)
|
362 |
+
processed_sentence = process_doubles(replaced_words)
|
363 |
+
return process_transcription(processed_sentence)
|
364 |
+
|
365 |
+
def transcribe_tel_eng(speech):
|
366 |
+
trn = Transliterator(source='tel', target='eng', build_lookup=True)
|
367 |
+
text = p4(speech)["text"]
|
368 |
+
if text is None:
|
369 |
+
return "Error: ASR returned None"
|
370 |
+
sentence = trn.transform(text)
|
371 |
+
if sentence is None:
|
372 |
+
return "Error: Transliteration returned None"
|
373 |
+
replaced_words = replace_words(sentence)
|
374 |
+
processed_sentence = process_doubles(replaced_words)
|
375 |
+
return process_transcription(processed_sentence)
|
376 |
+
|
377 |
+
|
378 |
+
def process_transcription(input_sentence):
|
379 |
+
word_to_code_map = {}
|
380 |
+
code_to_word_map = {}
|
381 |
+
|
382 |
+
transcript_1 = sentence_to_transcript(input_sentence, word_to_code_map)
|
383 |
+
if transcript_1 is None:
|
384 |
+
return "Error: Transcript conversion returned None"
|
385 |
+
|
386 |
+
numbers = text2int(transcript_1)
|
387 |
+
if numbers is None:
|
388 |
+
return "Error: Text to number conversion returned None"
|
389 |
+
|
390 |
+
code_to_word_map = {v: k for k, v in word_to_code_map.items()}
|
391 |
+
text = transcript_to_sentence(numbers, code_to_word_map)
|
392 |
+
return text
|
393 |
+
|
394 |
+
def sel_lng(lng, mic=None, file=None):
|
395 |
+
if mic is not None:
|
396 |
+
audio = mic
|
397 |
+
elif file is not None:
|
398 |
+
audio = file
|
399 |
+
else:
|
400 |
+
return "You must either provide a mic recording or a file"
|
401 |
+
|
402 |
+
if lng == "Odiya":
|
403 |
+
return transcribe_odiya(audio)
|
404 |
+
elif lng == "Odiya-trans":
|
405 |
+
return transcribe_odiya_eng(audio)
|
406 |
+
elif lng == "Hindi-trans":
|
407 |
+
return transcribe_hin_eng(audio)
|
408 |
+
elif lng == "Hindi":
|
409 |
+
return transcribe_hindi(audio)
|
410 |
+
elif lng == "Kannada-trans":
|
411 |
+
return transcribe_kan_eng(audio)
|
412 |
+
elif lng == "Kannada":
|
413 |
+
return transcribe_kannada(audio)
|
414 |
+
elif lng == "Telugu-trans":
|
415 |
+
return transcribe_tel_eng(audio)
|
416 |
+
elif lng == "Telugu":
|
417 |
+
return transcribe_telugu(audio)
|
418 |
+
elif lng == "Bangala-trans":
|
419 |
+
return transcribe_ban_eng(audio)
|
420 |
+
elif lng == "Bangala":
|
421 |
+
return transcribe_bangala(audio)
|
422 |
+
elif lng == "Assamese-LM":
|
423 |
+
return transcribe_assamese_LM(audio)
|
424 |
+
elif lng == "Assamese-Model2":
|
425 |
+
return transcribe_assamese_model2(audio)
|
426 |
+
elif lng == "Odia_model2":
|
427 |
+
return transcribe_odiya_model2(audio)
|
428 |
+
elif lng == "Odia_trans_model2":
|
429 |
+
return transcribe_odiya_eng_model2(audio)
|
430 |
+
|
431 |
+
|
432 |
+
|
433 |
+
|
434 |
# Convert transcript back to sentence
|
435 |
# reconstructed_sentence_1 = transcript_to_sentence(numbers, code_to_word_map)
|
436 |
|