Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -47,8 +47,8 @@ def createlex(filename):
|
|
47 |
data_dict[key] = value
|
48 |
return data_dict
|
49 |
|
50 |
-
|
51 |
-
|
52 |
def addnum(inlist):
|
53 |
sum=0
|
54 |
for num in inlist:
|
@@ -76,7 +76,7 @@ def get_val(word, lexicon):
|
|
76 |
return None
|
77 |
else:
|
78 |
return None
|
79 |
-
def
|
80 |
input += " #" # Add a period for termination
|
81 |
words = input.split()
|
82 |
i = 0
|
@@ -131,6 +131,81 @@ def convert2num(input, lex):
|
|
131 |
# Final processing
|
132 |
outstr = outstr.replace('#','') # Remove trailing spaces
|
133 |
return outstr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
# Function to apply a high-pass filter
|
135 |
def high_pass_filter(audio, sr, cutoff=300):
|
136 |
nyquist = 0.5 * sr
|
@@ -163,7 +238,7 @@ def recognize_speech_telugu(audio_file):
|
|
163 |
text_value = result['text']
|
164 |
print (text_value)
|
165 |
cleaned_text = text_value.replace("<s>", "")
|
166 |
-
converted_text=
|
167 |
# cleaned_text=convert2num(cleaned_text,lex)
|
168 |
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
169 |
# processed_doubles = process_doubles(converted_to_list)
|
@@ -181,6 +256,7 @@ def recognize_speech_kannada(audio_file):
|
|
181 |
result = asr_model_kannada(denoised_audio)
|
182 |
text_value = result['text']
|
183 |
cleaned_text = text_value.replace("[UNK]", "")
|
|
|
184 |
#converted_text=convert2num(cleaned_text,lex)
|
185 |
# cleaned_text=convert2num(cleaned_text,lex)
|
186 |
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
|
|
47 |
data_dict[key] = value
|
48 |
return data_dict
|
49 |
|
50 |
+
tellex=createlex("num_words_tel.txt")
|
51 |
+
kanlex=createlex("num_words_kn.txt")
|
52 |
def addnum(inlist):
|
53 |
sum=0
|
54 |
for num in inlist:
|
|
|
76 |
return None
|
77 |
else:
|
78 |
return None
|
79 |
+
def convert2numtel(input, lex):
|
80 |
input += " #" # Add a period for termination
|
81 |
words = input.split()
|
82 |
i = 0
|
|
|
131 |
# Final processing
|
132 |
outstr = outstr.replace('#','') # Remove trailing spaces
|
133 |
return outstr
|
134 |
+
|
135 |
+
def convert2numkn(input, lex):
|
136 |
+
input += " ######" # Add a period for termination
|
137 |
+
words = input.split()
|
138 |
+
i = 0
|
139 |
+
num = 0
|
140 |
+
outstr = ""
|
141 |
+
digit_end = True
|
142 |
+
numlist = []
|
143 |
+
addflag = False
|
144 |
+
|
145 |
+
prevword = []
|
146 |
+
|
147 |
+
# Process the words
|
148 |
+
while i < len(words):
|
149 |
+
|
150 |
+
# Get numerical value of the word
|
151 |
+
numval = get_val(words[i], lex)
|
152 |
+
if len(prevword)>=3:
|
153 |
+
prevword.pop(0)
|
154 |
+
prevword.append(words[i])
|
155 |
+
else:
|
156 |
+
prevword.append(words[i])
|
157 |
+
if numval is not None:
|
158 |
+
|
159 |
+
addflag = True
|
160 |
+
|
161 |
+
numlist.append(numval)
|
162 |
+
|
163 |
+
else:
|
164 |
+
|
165 |
+
|
166 |
+
#print("word--->"+words[i])
|
167 |
+
#print("addflagword--->"+str(addflag))
|
168 |
+
prevwords=" ".join(prevword)
|
169 |
+
#print("prev word--->"+prevwords)
|
170 |
+
numval=get_val(prevwords,lex)
|
171 |
+
if numval is not None:
|
172 |
+
#addflag=True
|
173 |
+
#print("numval " +numval)
|
174 |
+
numlist=[]
|
175 |
+
#print("First outstr--->"+outstr)
|
176 |
+
|
177 |
+
|
178 |
+
outwords = outstr.split()
|
179 |
+
outstr=" ".join(outwords[:-1])
|
180 |
+
#print("outstr--->"+outstr)
|
181 |
+
|
182 |
+
outstr += " " + str(numval) + " "
|
183 |
+
#print(" aoutstr--->"+outstr)
|
184 |
+
numval=0
|
185 |
+
addflag=False
|
186 |
+
|
187 |
+
else:
|
188 |
+
if addflag:
|
189 |
+
num = addnum(numlist)
|
190 |
+
outstr += str(num) + " " + words[i] + " "
|
191 |
+
#print("penlast outstr--->"+outstr)
|
192 |
+
addflag = False
|
193 |
+
numlist = []
|
194 |
+
else:
|
195 |
+
outstr += words[i] + " "
|
196 |
+
#print("last outstr--->"+outstr)
|
197 |
+
if not digit_end:
|
198 |
+
digit_end = True
|
199 |
+
|
200 |
+
|
201 |
+
|
202 |
+
# Move to the next word
|
203 |
+
i += 1
|
204 |
+
|
205 |
+
# Final processing
|
206 |
+
outstr = outstr.replace('#','') # Remove trailing spaces
|
207 |
+
return outstr
|
208 |
+
|
209 |
# Function to apply a high-pass filter
|
210 |
def high_pass_filter(audio, sr, cutoff=300):
|
211 |
nyquist = 0.5 * sr
|
|
|
238 |
text_value = result['text']
|
239 |
print (text_value)
|
240 |
cleaned_text = text_value.replace("<s>", "")
|
241 |
+
converted_text=convert2numtel(cleaned_text,tellex)
|
242 |
# cleaned_text=convert2num(cleaned_text,lex)
|
243 |
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|
244 |
# processed_doubles = process_doubles(converted_to_list)
|
|
|
256 |
result = asr_model_kannada(denoised_audio)
|
257 |
text_value = result['text']
|
258 |
cleaned_text = text_value.replace("[UNK]", "")
|
259 |
+
converted_text=convert2numkn(cleaned_text,kanlex)
|
260 |
#converted_text=convert2num(cleaned_text,lex)
|
261 |
# cleaned_text=convert2num(cleaned_text,lex)
|
262 |
# converted_to_list = convert_to_list(cleaned_text, text_to_list())
|