cdactvm commited on
Commit
a002fb8
·
verified ·
1 Parent(s): 0899121

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -4
app.py CHANGED
@@ -47,8 +47,8 @@ def createlex(filename):
47
  data_dict[key] = value
48
  return data_dict
49
 
50
- lex=createlex("num_words_tel.txt")
51
-
52
  def addnum(inlist):
53
  sum=0
54
  for num in inlist:
@@ -76,7 +76,7 @@ def get_val(word, lexicon):
76
  return None
77
  else:
78
  return None
79
- def convert2num(input, lex):
80
  input += " #" # Add a period for termination
81
  words = input.split()
82
  i = 0
@@ -131,6 +131,81 @@ def convert2num(input, lex):
131
  # Final processing
132
  outstr = outstr.replace('#','') # Remove trailing spaces
133
  return outstr
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134
  # Function to apply a high-pass filter
135
  def high_pass_filter(audio, sr, cutoff=300):
136
  nyquist = 0.5 * sr
@@ -163,7 +238,7 @@ def recognize_speech_telugu(audio_file):
163
  text_value = result['text']
164
  print (text_value)
165
  cleaned_text = text_value.replace("<s>", "")
166
- converted_text=convert2num(cleaned_text,lex)
167
  # cleaned_text=convert2num(cleaned_text,lex)
168
  # converted_to_list = convert_to_list(cleaned_text, text_to_list())
169
  # processed_doubles = process_doubles(converted_to_list)
@@ -181,6 +256,7 @@ def recognize_speech_kannada(audio_file):
181
  result = asr_model_kannada(denoised_audio)
182
  text_value = result['text']
183
  cleaned_text = text_value.replace("[UNK]", "")
 
184
  #converted_text=convert2num(cleaned_text,lex)
185
  # cleaned_text=convert2num(cleaned_text,lex)
186
  # converted_to_list = convert_to_list(cleaned_text, text_to_list())
 
47
  data_dict[key] = value
48
  return data_dict
49
 
50
+ tellex=createlex("num_words_tel.txt")
51
+ kanlex=createlex("num_words_kn.txt")
52
  def addnum(inlist):
53
  sum=0
54
  for num in inlist:
 
76
  return None
77
  else:
78
  return None
79
+ def convert2numtel(input, lex):
80
  input += " #" # Add a period for termination
81
  words = input.split()
82
  i = 0
 
131
  # Final processing
132
  outstr = outstr.replace('#','') # Remove trailing spaces
133
  return outstr
134
+
135
+ def convert2numkn(input, lex):
136
+ input += " ######" # Add a period for termination
137
+ words = input.split()
138
+ i = 0
139
+ num = 0
140
+ outstr = ""
141
+ digit_end = True
142
+ numlist = []
143
+ addflag = False
144
+
145
+ prevword = []
146
+
147
+ # Process the words
148
+ while i < len(words):
149
+
150
+ # Get numerical value of the word
151
+ numval = get_val(words[i], lex)
152
+ if len(prevword)>=3:
153
+ prevword.pop(0)
154
+ prevword.append(words[i])
155
+ else:
156
+ prevword.append(words[i])
157
+ if numval is not None:
158
+
159
+ addflag = True
160
+
161
+ numlist.append(numval)
162
+
163
+ else:
164
+
165
+
166
+ #print("word--->"+words[i])
167
+ #print("addflagword--->"+str(addflag))
168
+ prevwords=" ".join(prevword)
169
+ #print("prev word--->"+prevwords)
170
+ numval=get_val(prevwords,lex)
171
+ if numval is not None:
172
+ #addflag=True
173
+ #print("numval " +numval)
174
+ numlist=[]
175
+ #print("First outstr--->"+outstr)
176
+
177
+
178
+ outwords = outstr.split()
179
+ outstr=" ".join(outwords[:-1])
180
+ #print("outstr--->"+outstr)
181
+
182
+ outstr += " " + str(numval) + " "
183
+ #print(" aoutstr--->"+outstr)
184
+ numval=0
185
+ addflag=False
186
+
187
+ else:
188
+ if addflag:
189
+ num = addnum(numlist)
190
+ outstr += str(num) + " " + words[i] + " "
191
+ #print("penlast outstr--->"+outstr)
192
+ addflag = False
193
+ numlist = []
194
+ else:
195
+ outstr += words[i] + " "
196
+ #print("last outstr--->"+outstr)
197
+ if not digit_end:
198
+ digit_end = True
199
+
200
+
201
+
202
+ # Move to the next word
203
+ i += 1
204
+
205
+ # Final processing
206
+ outstr = outstr.replace('#','') # Remove trailing spaces
207
+ return outstr
208
+
209
  # Function to apply a high-pass filter
210
  def high_pass_filter(audio, sr, cutoff=300):
211
  nyquist = 0.5 * sr
 
238
  text_value = result['text']
239
  print (text_value)
240
  cleaned_text = text_value.replace("<s>", "")
241
+ converted_text=convert2numtel(cleaned_text,tellex)
242
  # cleaned_text=convert2num(cleaned_text,lex)
243
  # converted_to_list = convert_to_list(cleaned_text, text_to_list())
244
  # processed_doubles = process_doubles(converted_to_list)
 
256
  result = asr_model_kannada(denoised_audio)
257
  text_value = result['text']
258
  cleaned_text = text_value.replace("[UNK]", "")
259
+ converted_text=convert2numkn(cleaned_text,kanlex)
260
  #converted_text=convert2num(cleaned_text,lex)
261
  # cleaned_text=convert2num(cleaned_text,lex)
262
  # converted_to_list = convert_to_list(cleaned_text, text_to_list())