Added TextGrid Interval Support

#1
Files changed (1) hide show
  1. app.py +87 -1
app.py CHANGED
@@ -1,10 +1,12 @@
1
  from pathlib import Path
2
  import tempfile
 
3
 
4
  import gradio as gr
5
  import librosa
6
  import tgt.core
7
  import tgt.io3
 
8
  from transformers import pipeline
9
 
10
  TEXTGRID_DIR = tempfile.mkdtemp()
@@ -105,6 +107,45 @@ def get_interactive_download_button(textgrid_contents, textgrid_filename):
105
  )
106
 
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  def launch_demo():
109
  initial_model = {
110
  "loaded_model": pipeline(
@@ -125,9 +166,15 @@ def launch_demo():
125
  info="Select the model to use for prediction.",
126
  )
127
  audio_in = gr.Audio(type="filepath", show_download_button=True)
 
128
  model_state = gr.State(value=initial_model)
129
 
130
- prediction = gr.Textbox(label="Predicted IPA transcription")
 
 
 
 
 
131
 
132
  gr.Markdown("""## TextGrid File Options
133
  Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
@@ -152,6 +199,14 @@ def launch_demo():
152
  variant="primary",
153
  )
154
 
 
 
 
 
 
 
 
 
155
  # Update prediction if model or audio changes
156
  gr.on(
157
  triggers=[audio_in.input, model_name.change],
@@ -160,6 +215,13 @@ def launch_demo():
160
  outputs=[prediction, model_state, textgrid_filename],
161
  )
162
 
 
 
 
 
 
 
 
163
  # Download button becomes interactive if user updates audio or textgrid params
164
  gr.on(
165
  triggers=[textgrid_contents.change, textgrid_filename.change],
@@ -168,6 +230,30 @@ def launch_demo():
168
  outputs=[download_btn],
169
  )
170
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  demo.launch(max_file_size="100mb")
172
 
173
 
 
1
  from pathlib import Path
2
  import tempfile
3
+ import os
4
 
5
  import gradio as gr
6
  import librosa
7
  import tgt.core
8
  import tgt.io3
9
+ import soundfile as sf
10
  from transformers import pipeline
11
 
12
  TEXTGRID_DIR = tempfile.mkdtemp()
 
107
  )
108
 
109
 
110
+ def transcribe_intervals(audio_in, textgrid_path, source_tier, target_tier, model_state):
111
+ if audio_in is None or textgrid_path is None:
112
+ return "Missing audio or TextGrid input file."
113
+
114
+ tg=tgt.io.read_textgrid(textgrid_path.name)
115
+ tier = tg.get_tier_by_name(source_tier)
116
+ ipa_tier = tgt.core.IntervalTier(name=target_tier)
117
+
118
+ for interval in tier.intervals:
119
+ if not interval.text.strip(): # Skip empty text intervals
120
+ continue
121
+
122
+ start, end = interval.start_time, interval.end_time
123
+ try:
124
+ y, sr = librosa.load(audio_in, sr=None, offset=start, duration=end-start)
125
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
126
+ sf.write(temp_audio.name, y, sr)
127
+ prediction = model_state["loaded_model"](temp_audio.name)["text"]
128
+ ipa_tier.add_annotation(tgt.core.Interval(start, end, prediction))
129
+ os.remove(temp_audio.name)
130
+ except Exception as e:
131
+ ipa_tier.add_annotation(tgt.core.Interval(start, end, f"[Error]: {str(e)}"))
132
+
133
+ tg.add_tier(ipa_tier)
134
+ tgt_str = tgt.io3.export_to_long_textgrid(tg)
135
+
136
+ return tgt_str, tgt_str
137
+
138
+
139
+
140
+ def extract_tier_names(textgrid_file):
141
+ try:
142
+ tg = tgt.io.read_textgrid(textgrid_file.name)
143
+ tier_names = [tier.name for tier in tg.tiers]
144
+ return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
145
+ except Exception as e:
146
+ return gr.update(choices=[], value=None)
147
+
148
+
149
  def launch_demo():
150
  initial_model = {
151
  "loaded_model": pipeline(
 
166
  info="Select the model to use for prediction.",
167
  )
168
  audio_in = gr.Audio(type="filepath", show_download_button=True)
169
+ textgrid_file = gr.File(file_types=[".TextGrid"], label="Upload TextGrid File")
170
  model_state = gr.State(value=initial_model)
171
 
172
+ tier_names = gr.Dropdown(label="Source Tier (existing)", choices=[], interactive=True)
173
+ target_tier = gr.Textbox(label="Target Tier (new)", value="IPATier", placeholder="e.g. IPATier")
174
+
175
+ run_btn = gr.Button("Transcribe Intervals")
176
+
177
+ prediction = gr.Textbox(label="Full Audio IPA transcription")
178
 
179
  gr.Markdown("""## TextGrid File Options
180
  Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
 
199
  variant="primary",
200
  )
201
 
202
+ transcription_result = gr.Textbox(visible=False)
203
+ textgrid_preview = gr.Textbox(
204
+ label="Updated Interval Wise TextGrid Preview",
205
+ lines=20,
206
+ interactive=False,
207
+ show_copy_button=True
208
+ )
209
+
210
  # Update prediction if model or audio changes
211
  gr.on(
212
  triggers=[audio_in.input, model_name.change],
 
215
  outputs=[prediction, model_state, textgrid_filename],
216
  )
217
 
218
+ gr.on(
219
+ triggers=[audio_in.input, textgrid_tier.input, prediction.change],
220
+ fn=get_textgrid_contents,
221
+ inputs=[audio_in, textgrid_tier, prediction],
222
+ outputs=[textgrid_contents],
223
+ )
224
+
225
  # Download button becomes interactive if user updates audio or textgrid params
226
  gr.on(
227
  triggers=[textgrid_contents.change, textgrid_filename.change],
 
230
  outputs=[download_btn],
231
  )
232
 
233
+ textgrid_file.change(
234
+ fn=extract_tier_names,
235
+ inputs=[textgrid_file],
236
+ outputs=[tier_names],
237
+ )
238
+
239
+ run_btn.click(
240
+ fn=transcribe_intervals,
241
+ inputs=[audio_in, textgrid_file, tier_names, target_tier, model_state],
242
+ outputs=[transcription_result, textgrid_preview]
243
+ )
244
+
245
+ transcription_result.change(
246
+ fn=lambda tg_text: tg_text,
247
+ inputs=transcription_result,
248
+ outputs=textgrid_contents
249
+ )
250
+
251
+ transcription_result.change(
252
+ fn=lambda tg_text, filename: write_textgrid(tg_text, filename),
253
+ inputs=[transcription_result, textgrid_filename],
254
+ outputs=download_btn
255
+ )
256
+
257
  demo.launch(max_file_size="100mb")
258
 
259