Spaces:
Running
Running
Added TextGrid Interval Support
#1
by
parthbhangla
- opened
app.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1 |
from pathlib import Path
|
2 |
import tempfile
|
|
|
3 |
|
4 |
import gradio as gr
|
5 |
import librosa
|
6 |
import tgt.core
|
7 |
import tgt.io3
|
|
|
8 |
from transformers import pipeline
|
9 |
|
10 |
TEXTGRID_DIR = tempfile.mkdtemp()
|
@@ -105,6 +107,45 @@ def get_interactive_download_button(textgrid_contents, textgrid_filename):
|
|
105 |
)
|
106 |
|
107 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
def launch_demo():
|
109 |
initial_model = {
|
110 |
"loaded_model": pipeline(
|
@@ -125,9 +166,15 @@ def launch_demo():
|
|
125 |
info="Select the model to use for prediction.",
|
126 |
)
|
127 |
audio_in = gr.Audio(type="filepath", show_download_button=True)
|
|
|
128 |
model_state = gr.State(value=initial_model)
|
129 |
|
130 |
-
|
|
|
|
|
|
|
|
|
|
|
131 |
|
132 |
gr.Markdown("""## TextGrid File Options
|
133 |
Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
|
@@ -152,6 +199,14 @@ def launch_demo():
|
|
152 |
variant="primary",
|
153 |
)
|
154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
# Update prediction if model or audio changes
|
156 |
gr.on(
|
157 |
triggers=[audio_in.input, model_name.change],
|
@@ -160,6 +215,13 @@ def launch_demo():
|
|
160 |
outputs=[prediction, model_state, textgrid_filename],
|
161 |
)
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
# Download button becomes interactive if user updates audio or textgrid params
|
164 |
gr.on(
|
165 |
triggers=[textgrid_contents.change, textgrid_filename.change],
|
@@ -168,6 +230,30 @@ def launch_demo():
|
|
168 |
outputs=[download_btn],
|
169 |
)
|
170 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
demo.launch(max_file_size="100mb")
|
172 |
|
173 |
|
|
|
1 |
from pathlib import Path
|
2 |
import tempfile
|
3 |
+
import os
|
4 |
|
5 |
import gradio as gr
|
6 |
import librosa
|
7 |
import tgt.core
|
8 |
import tgt.io3
|
9 |
+
import soundfile as sf
|
10 |
from transformers import pipeline
|
11 |
|
12 |
TEXTGRID_DIR = tempfile.mkdtemp()
|
|
|
107 |
)
|
108 |
|
109 |
|
110 |
+
def transcribe_intervals(audio_in, textgrid_path, source_tier, target_tier, model_state):
|
111 |
+
if audio_in is None or textgrid_path is None:
|
112 |
+
return "Missing audio or TextGrid input file."
|
113 |
+
|
114 |
+
tg=tgt.io.read_textgrid(textgrid_path.name)
|
115 |
+
tier = tg.get_tier_by_name(source_tier)
|
116 |
+
ipa_tier = tgt.core.IntervalTier(name=target_tier)
|
117 |
+
|
118 |
+
for interval in tier.intervals:
|
119 |
+
if not interval.text.strip(): # Skip empty text intervals
|
120 |
+
continue
|
121 |
+
|
122 |
+
start, end = interval.start_time, interval.end_time
|
123 |
+
try:
|
124 |
+
y, sr = librosa.load(audio_in, sr=None, offset=start, duration=end-start)
|
125 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
|
126 |
+
sf.write(temp_audio.name, y, sr)
|
127 |
+
prediction = model_state["loaded_model"](temp_audio.name)["text"]
|
128 |
+
ipa_tier.add_annotation(tgt.core.Interval(start, end, prediction))
|
129 |
+
os.remove(temp_audio.name)
|
130 |
+
except Exception as e:
|
131 |
+
ipa_tier.add_annotation(tgt.core.Interval(start, end, f"[Error]: {str(e)}"))
|
132 |
+
|
133 |
+
tg.add_tier(ipa_tier)
|
134 |
+
tgt_str = tgt.io3.export_to_long_textgrid(tg)
|
135 |
+
|
136 |
+
return tgt_str, tgt_str
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
def extract_tier_names(textgrid_file):
|
141 |
+
try:
|
142 |
+
tg = tgt.io.read_textgrid(textgrid_file.name)
|
143 |
+
tier_names = [tier.name for tier in tg.tiers]
|
144 |
+
return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
|
145 |
+
except Exception as e:
|
146 |
+
return gr.update(choices=[], value=None)
|
147 |
+
|
148 |
+
|
149 |
def launch_demo():
|
150 |
initial_model = {
|
151 |
"loaded_model": pipeline(
|
|
|
166 |
info="Select the model to use for prediction.",
|
167 |
)
|
168 |
audio_in = gr.Audio(type="filepath", show_download_button=True)
|
169 |
+
textgrid_file = gr.File(file_types=[".TextGrid"], label="Upload TextGrid File")
|
170 |
model_state = gr.State(value=initial_model)
|
171 |
|
172 |
+
tier_names = gr.Dropdown(label="Source Tier (existing)", choices=[], interactive=True)
|
173 |
+
target_tier = gr.Textbox(label="Target Tier (new)", value="IPATier", placeholder="e.g. IPATier")
|
174 |
+
|
175 |
+
run_btn = gr.Button("Transcribe Intervals")
|
176 |
+
|
177 |
+
prediction = gr.Textbox(label="Full Audio IPA transcription")
|
178 |
|
179 |
gr.Markdown("""## TextGrid File Options
|
180 |
Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
|
|
|
199 |
variant="primary",
|
200 |
)
|
201 |
|
202 |
+
transcription_result = gr.Textbox(visible=False)
|
203 |
+
textgrid_preview = gr.Textbox(
|
204 |
+
label="Updated Interval Wise TextGrid Preview",
|
205 |
+
lines=20,
|
206 |
+
interactive=False,
|
207 |
+
show_copy_button=True
|
208 |
+
)
|
209 |
+
|
210 |
# Update prediction if model or audio changes
|
211 |
gr.on(
|
212 |
triggers=[audio_in.input, model_name.change],
|
|
|
215 |
outputs=[prediction, model_state, textgrid_filename],
|
216 |
)
|
217 |
|
218 |
+
gr.on(
|
219 |
+
triggers=[audio_in.input, textgrid_tier.input, prediction.change],
|
220 |
+
fn=get_textgrid_contents,
|
221 |
+
inputs=[audio_in, textgrid_tier, prediction],
|
222 |
+
outputs=[textgrid_contents],
|
223 |
+
)
|
224 |
+
|
225 |
# Download button becomes interactive if user updates audio or textgrid params
|
226 |
gr.on(
|
227 |
triggers=[textgrid_contents.change, textgrid_filename.change],
|
|
|
230 |
outputs=[download_btn],
|
231 |
)
|
232 |
|
233 |
+
textgrid_file.change(
|
234 |
+
fn=extract_tier_names,
|
235 |
+
inputs=[textgrid_file],
|
236 |
+
outputs=[tier_names],
|
237 |
+
)
|
238 |
+
|
239 |
+
run_btn.click(
|
240 |
+
fn=transcribe_intervals,
|
241 |
+
inputs=[audio_in, textgrid_file, tier_names, target_tier, model_state],
|
242 |
+
outputs=[transcription_result, textgrid_preview]
|
243 |
+
)
|
244 |
+
|
245 |
+
transcription_result.change(
|
246 |
+
fn=lambda tg_text: tg_text,
|
247 |
+
inputs=transcription_result,
|
248 |
+
outputs=textgrid_contents
|
249 |
+
)
|
250 |
+
|
251 |
+
transcription_result.change(
|
252 |
+
fn=lambda tg_text, filename: write_textgrid(tg_text, filename),
|
253 |
+
inputs=[transcription_result, textgrid_filename],
|
254 |
+
outputs=download_btn
|
255 |
+
)
|
256 |
+
|
257 |
demo.launch(max_file_size="100mb")
|
258 |
|
259 |
|