cdactvm commited on
Commit
c7b93d7
·
verified ·
1 Parent(s): 419f2ee

Update Tamil_number_conversion.py

Browse files
Files changed (1) hide show
  1. Tamil_number_conversion.py +65 -78
Tamil_number_conversion.py CHANGED
@@ -1,78 +1,65 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[1]:
5
-
6
-
7
- import gradio as gr
8
- import librosa
9
- import numpy as np
10
- import pywt
11
- import nbimporter
12
- from scipy.signal import butter, lfilter, wiener
13
- from scipy.io.wavfile import write
14
- from transformers import pipeline
15
- from text2int import text_to_int
16
- from isNumber import is_number
17
- from Text2List import text_to_list
18
- from convert2list import convert_to_list
19
- from processDoubles import process_doubles
20
- from replaceWords import replace_words
21
-
22
- asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
23
-
24
- # Function to apply a high-pass filter
25
- def high_pass_filter(audio, sr, cutoff=300):
26
- nyquist = 0.5 * sr
27
- normal_cutoff = cutoff / nyquist
28
- b, a = butter(1, normal_cutoff, btype='high', analog=False)
29
- filtered_audio = lfilter(b, a, audio)
30
- return filtered_audio
31
-
32
- # Function to apply wavelet denoising
33
- def wavelet_denoise(audio, wavelet='db1', level=1):
34
- coeffs = pywt.wavedec(audio, wavelet, mode='per')
35
- sigma = np.median(np.abs(coeffs[-level])) / 0.5
36
- uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
37
- coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
38
- return pywt.waverec(coeffs, wavelet, mode='per')
39
-
40
- # Function to apply a Wiener filter for noise reduction
41
- def apply_wiener_filter(audio):
42
- return wiener(audio)
43
-
44
- # Function to handle speech recognition
45
- def recognize_speech(audio_file):
46
- audio, sr = librosa.load(audio_file, sr=16000)
47
- audio = high_pass_filter(audio, sr)
48
- audio = apply_wiener_filter(audio)
49
- denoised_audio = wavelet_denoise(audio)
50
- result = asr_model(denoised_audio)
51
- text_value = result['text']
52
- cleaned_text = text_value.replace("<s>", "")
53
- print(cleaned_text)
54
- converted_to_list = convert_to_list(cleaned_text, text_to_list())
55
- print(converted_to_list)
56
- processed_doubles = process_doubles(converted_to_list)
57
- print(processed_doubles)
58
- replaced_words = replace_words(processed_doubles)
59
- print(replaced_words)
60
- converted_text = text_to_int(replaced_words)
61
- print(converted_text)
62
- return converted_text
63
-
64
- # Gradio Interface
65
- gr.Interface(
66
- fn=recognize_speech,
67
- inputs=gr.Audio(sources=["microphone","upload"], type="filepath"),
68
- outputs="text",
69
- title="Speech Recognition with Advanced Noise Reduction & Hindi ASR",
70
- description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio."
71
- ).launch()
72
-
73
-
74
- # In[ ]:
75
-
76
-
77
-
78
-
 
1
+ import gradio as gr
2
+ import librosa
3
+ import numpy as np
4
+ import pywt
5
+ import nbimporter
6
+ from scipy.signal import butter, lfilter, wiener
7
+ from scipy.io.wavfile import write
8
+ from transformers import pipeline
9
+ from text2int import text_to_int
10
+ from isNumber import is_number
11
+ from Text2List import text_to_list
12
+ from convert2list import convert_to_list
13
+ from processDoubles import process_doubles
14
+ from replaceWords import replace_words
15
+
16
+ asr_model = pipeline("automatic-speech-recognition", model="cdactvm/w2v-bert-tamil_new")
17
+
18
+ # Function to apply a high-pass filter
19
+ def high_pass_filter(audio, sr, cutoff=300):
20
+ nyquist = 0.5 * sr
21
+ normal_cutoff = cutoff / nyquist
22
+ b, a = butter(1, normal_cutoff, btype='high', analog=False)
23
+ filtered_audio = lfilter(b, a, audio)
24
+ return filtered_audio
25
+
26
+ # Function to apply wavelet denoising
27
+ def wavelet_denoise(audio, wavelet='db1', level=1):
28
+ coeffs = pywt.wavedec(audio, wavelet, mode='per')
29
+ sigma = np.median(np.abs(coeffs[-level])) / 0.5
30
+ uthresh = sigma * np.sqrt(2 * np.log(len(audio)))
31
+ coeffs[1:] = [pywt.threshold(i, value=uthresh, mode='soft') for i in coeffs[1:]]
32
+ return pywt.waverec(coeffs, wavelet, mode='per')
33
+
34
+ # Function to apply a Wiener filter for noise reduction
35
+ def apply_wiener_filter(audio):
36
+ return wiener(audio)
37
+
38
+ # Function to handle speech recognition
39
+ def recognize_speech(audio_file):
40
+ audio, sr = librosa.load(audio_file, sr=16000)
41
+ audio = high_pass_filter(audio, sr)
42
+ audio = apply_wiener_filter(audio)
43
+ denoised_audio = wavelet_denoise(audio)
44
+ result = asr_model(denoised_audio)
45
+ text_value = result['text']
46
+ cleaned_text = text_value.replace("<s>", "")
47
+ print(cleaned_text)
48
+ converted_to_list = convert_to_list(cleaned_text, text_to_list())
49
+ print(converted_to_list)
50
+ processed_doubles = process_doubles(converted_to_list)
51
+ print(processed_doubles)
52
+ replaced_words = replace_words(processed_doubles)
53
+ print(replaced_words)
54
+ converted_text = text_to_int(replaced_words)
55
+ print(converted_text)
56
+ return converted_text
57
+
58
+ # Gradio Interface
59
+ gr.Interface(
60
+ fn=recognize_speech,
61
+ inputs=gr.Audio(sources=["microphone","upload"], type="filepath"),
62
+ outputs="text",
63
+ title="Speech Recognition with Advanced Noise Reduction & Hindi ASR",
64
+ description="Upload an audio file, and the system will use high-pass filtering, Wiener filtering, and wavelet-based denoising, then a Hindi ASR model will transcribe the clean audio."
65
+ ).launch()