parthbhangla commited on
Commit
7546b72
·
verified ·
1 Parent(s): 716f8aa

Error Handling + Joe's Suggestion

Browse files

Error Handling For Model Loading
Error Handling for audio duration and textgrid length (warning if audio>textgrid - only annotated will be transcribed, error is textgrid>audio)
Changed Dropdown Menu Option Name (Joe Emailed About This)

Files changed (1) hide show
  1. app.py +56 -31
app.py CHANGED
@@ -47,31 +47,32 @@ def load_model_and_predict(
47
  audio_in: str,
48
  model_state: dict,
49
  ):
50
- if audio_in is None:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  return (
52
- "",
53
  model_state,
54
- gr.Textbox(label=TEXTGRID_NAME_INPUT_LABEL, interactive=False),
55
- )
56
-
57
- if model_state["model_name"] != model_name:
58
- model_state = {
59
- "loaded_model": pipeline(
60
- task="automatic-speech-recognition", model=model_name
61
  ),
62
- "model_name": model_name,
63
- }
64
-
65
- prediction = model_state["loaded_model"](audio_in)["text"]
66
- return (
67
- prediction,
68
- model_state,
69
- gr.Textbox(
70
- label=TEXTGRID_NAME_INPUT_LABEL,
71
- interactive=True,
72
- value=Path(audio_in).with_suffix(".TextGrid").name,
73
- ),
74
- )
75
 
76
 
77
  def get_textgrid_contents(audio_in, textgrid_tier_name, transcription_prediction):
@@ -144,6 +145,34 @@ def extract_tier_names(textgrid_file):
144
  return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
145
  except Exception as e:
146
  return gr.update(choices=[], value=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
 
149
  def launch_demo():
@@ -154,10 +183,6 @@ def launch_demo():
154
  "model_name": DEFAULT_MODEL,
155
  }
156
 
157
- # Helper function - enables the interval transcribe button
158
- def enable_interval_transcribe_btn(audio, textgrid):
159
- return gr.update(interactive=(audio is not None and textgrid is not None))
160
-
161
  with gr.Blocks() as demo:
162
  gr.Markdown("""# Automatic International Phonetic Alphabet Transcription
163
  This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.""")
@@ -172,7 +197,7 @@ def launch_demo():
172
 
173
  # Dropdown for transcription type selection
174
  transcription_type = gr.Dropdown(
175
- choices=["Full Audio", "Interval"],
176
  label="Transcription Type",
177
  value=None,
178
  interactive=True,
@@ -209,7 +234,7 @@ def launch_demo():
209
  transcription_type.change(
210
  fn=lambda t: (
211
  gr.update(visible=t == "Full Audio"),
212
- gr.update(visible=t == "Interval"),
213
  ),
214
  inputs=transcription_type,
215
  outputs=[full_audio_section, interval_section],
@@ -248,13 +273,13 @@ def launch_demo():
248
 
249
  # Enable interval transcribe button only when both files are uploaded
250
  interval_audio.change(
251
- fn=enable_interval_transcribe_btn,
252
  inputs=[interval_audio, interval_textgrid_file],
253
  outputs=[interval_transcribe_btn],
254
  )
255
 
256
  interval_textgrid_file.change(
257
- fn=enable_interval_transcribe_btn,
258
  inputs=[interval_audio, interval_textgrid_file],
259
  outputs=[interval_transcribe_btn],
260
  )
@@ -286,4 +311,4 @@ def launch_demo():
286
  demo.launch(max_file_size="100mb")
287
 
288
  if __name__ == "__main__":
289
- launch_demo()
 
47
  audio_in: str,
48
  model_state: dict,
49
  ):
50
+ try:
51
+ if audio_in is None:
52
+ return (
53
+ "",
54
+ model_state,
55
+ gr.Textbox(label=TEXTGRID_NAME_INPUT_LABEL, interactive=False),
56
+ )
57
+
58
+ if model_state["model_name"] != model_name:
59
+ model_state = {
60
+ "loaded_model": pipeline(task="automatic-speech-recognition", model=model_name),
61
+ "model_name": model_name,
62
+ }
63
+
64
+ prediction = model_state["loaded_model"](audio_in)["text"]
65
  return (
66
+ prediction,
67
  model_state,
68
+ gr.Textbox(
69
+ label=TEXTGRID_NAME_INPUT_LABEL,
70
+ interactive=True,
71
+ value=Path(audio_in).with_suffix(".TextGrid").name,
 
 
 
72
  ),
73
+ )
74
+ except Exception as e:
75
+ raise gr.Error(f"Failed to load model: {str(e)}")
 
 
 
 
 
 
 
 
 
 
76
 
77
 
78
  def get_textgrid_contents(audio_in, textgrid_tier_name, transcription_prediction):
 
145
  return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
146
  except Exception as e:
147
  return gr.update(choices=[], value=None)
148
+
149
+
150
+ def validate_textgrid_for_intervals(audio_path, textgrid_file):
151
+ try:
152
+ if not audio_path or not textgrid_file:
153
+ return gr.update(interactive=False)
154
+
155
+ audio_duration = librosa.get_duration(path=audio_path)
156
+ tg = tgt.io.read_textgrid(textgrid_file.name)
157
+ tg_end_time = max(tier.end_time for tier in tg.tiers)
158
+
159
+ if tg_end_time > audio_duration:
160
+ raise gr.Error(
161
+ f"TextGrid ends at {tg_end_time:.2f}s but audio is only {audio_duration:.2f}s. "
162
+ "Please upload matching files."
163
+ )
164
+
165
+ epsilon = 0.01
166
+ if abs(tg_end_time - audio_duration) > epsilon:
167
+ gr.Warning(
168
+ f"TextGrid ends at {tg_end_time:.2f}s but audio is {audio_duration:.2f}s. "
169
+ "Only the annotated portion will be transcribed."
170
+ )
171
+
172
+ return gr.update(interactive=True)
173
+
174
+ except Exception as e:
175
+ raise gr.Error(f"Invalid TextGrid or audio file:\n{str(e)}")
176
 
177
 
178
  def launch_demo():
 
183
  "model_name": DEFAULT_MODEL,
184
  }
185
 
 
 
 
 
186
  with gr.Blocks() as demo:
187
  gr.Markdown("""# Automatic International Phonetic Alphabet Transcription
188
  This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.""")
 
197
 
198
  # Dropdown for transcription type selection
199
  transcription_type = gr.Dropdown(
200
+ choices=["Full Audio", "TextGrid Interval"],
201
  label="Transcription Type",
202
  value=None,
203
  interactive=True,
 
234
  transcription_type.change(
235
  fn=lambda t: (
236
  gr.update(visible=t == "Full Audio"),
237
+ gr.update(visible=t == "TextGrid Interval"),
238
  ),
239
  inputs=transcription_type,
240
  outputs=[full_audio_section, interval_section],
 
273
 
274
  # Enable interval transcribe button only when both files are uploaded
275
  interval_audio.change(
276
+ fn=validate_textgrid_for_intervals,
277
  inputs=[interval_audio, interval_textgrid_file],
278
  outputs=[interval_transcribe_btn],
279
  )
280
 
281
  interval_textgrid_file.change(
282
+ fn=validate_textgrid_for_intervals,
283
  inputs=[interval_audio, interval_textgrid_file],
284
  outputs=[interval_transcribe_btn],
285
  )
 
311
  demo.launch(max_file_size="100mb")
312
 
313
  if __name__ == "__main__":
314
+ launch_demo()