Spaces:

ginic
/

multipa-english-to-ipa

Running

App Files Files Community

Added TextGrid Interval Support

by parthbhangla - opened Jun 20

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+87

-1

Files changed (1) hide show

app.py +87 -1

app.py CHANGED Viewed

@@ -1,10 +1,12 @@
 from pathlib import Path
 import tempfile
 import gradio as gr
 import librosa
 import tgt.core
 import tgt.io3
 from transformers import pipeline
 TEXTGRID_DIR = tempfile.mkdtemp()
@@ -105,6 +107,45 @@ def get_interactive_download_button(textgrid_contents, textgrid_filename):
     )
 def launch_demo():
     initial_model = {
         "loaded_model": pipeline(
@@ -125,9 +166,15 @@ def launch_demo():
             info="Select the model to use for prediction.",
         )
         audio_in = gr.Audio(type="filepath", show_download_button=True)
         model_state = gr.State(value=initial_model)
-        prediction = gr.Textbox(label="Predicted IPA transcription")
         gr.Markdown("""## TextGrid File Options
                     Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
@@ -152,6 +199,14 @@ def launch_demo():
             variant="primary",
         )
         # Update prediction if model or audio changes
         gr.on(
             triggers=[audio_in.input, model_name.change],
@@ -160,6 +215,13 @@ def launch_demo():
             outputs=[prediction, model_state, textgrid_filename],
         )
         # Download button becomes interactive if user updates audio or textgrid params
         gr.on(
             triggers=[textgrid_contents.change, textgrid_filename.change],
@@ -168,6 +230,30 @@ def launch_demo():
             outputs=[download_btn],
         )
     demo.launch(max_file_size="100mb")

 from pathlib import Path
 import tempfile
+import os
 import gradio as gr
 import librosa
 import tgt.core
 import tgt.io3
+import soundfile as sf
 from transformers import pipeline
 TEXTGRID_DIR = tempfile.mkdtemp()
     )
+def transcribe_intervals(audio_in, textgrid_path, source_tier, target_tier, model_state):
+    if audio_in is None or textgrid_path is None:
+        return "Missing audio or TextGrid input file."
+    tg=tgt.io.read_textgrid(textgrid_path.name)
+    tier = tg.get_tier_by_name(source_tier)
+    ipa_tier = tgt.core.IntervalTier(name=target_tier)
+    for interval in tier.intervals:
+        if not interval.text.strip(): # Skip empty text intervals
+            continue
+        start, end = interval.start_time, interval.end_time
+        try:
+            y, sr = librosa.load(audio_in, sr=None, offset=start, duration=end-start)
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
+                sf.write(temp_audio.name, y, sr)
+                prediction = model_state["loaded_model"](temp_audio.name)["text"]
+                ipa_tier.add_annotation(tgt.core.Interval(start, end, prediction))
+                os.remove(temp_audio.name)
+        except Exception as e:
+            ipa_tier.add_annotation(tgt.core.Interval(start, end, f"[Error]: {str(e)}"))
+    tg.add_tier(ipa_tier)
+    tgt_str = tgt.io3.export_to_long_textgrid(tg)
+    return tgt_str, tgt_str
+def extract_tier_names(textgrid_file):
+    try:
+        tg = tgt.io.read_textgrid(textgrid_file.name)
+        tier_names = [tier.name for tier in tg.tiers]
+        return gr.update(choices=tier_names, value=tier_names[0] if tier_names else None)
+    except Exception as e:
+        return gr.update(choices=[], value=None)
 def launch_demo():
     initial_model = {
         "loaded_model": pipeline(
             info="Select the model to use for prediction.",
         )
         audio_in = gr.Audio(type="filepath", show_download_button=True)
+        textgrid_file = gr.File(file_types=[".TextGrid"], label="Upload TextGrid File")
         model_state = gr.State(value=initial_model)
+        tier_names = gr.Dropdown(label="Source Tier (existing)", choices=[], interactive=True)
+        target_tier = gr.Textbox(label="Target Tier (new)", value="IPATier", placeholder="e.g. IPATier")
+        run_btn = gr.Button("Transcribe Intervals")
+        prediction = gr.Textbox(label="Full Audio IPA transcription")
         gr.Markdown("""## TextGrid File Options
                     Change these inputs if you'd like to customize and download the transcription in [TextGrid format](https://www.fon.hum.uva.nl/praat/manual/TextGrid_file_formats.html) for Praat.
             variant="primary",
         )
+        transcription_result = gr.Textbox(visible=False)
+        textgrid_preview = gr.Textbox(
+            label="Updated Interval Wise TextGrid Preview",
+            lines=20,
+            interactive=False,
+            show_copy_button=True
+        )
         # Update prediction if model or audio changes
         gr.on(
             triggers=[audio_in.input, model_name.change],
             outputs=[prediction, model_state, textgrid_filename],
         )
+        gr.on(
+            triggers=[audio_in.input, textgrid_tier.input, prediction.change],
+            fn=get_textgrid_contents,
+            inputs=[audio_in, textgrid_tier, prediction],
+            outputs=[textgrid_contents],
+        )
         # Download button becomes interactive if user updates audio or textgrid params
         gr.on(
             triggers=[textgrid_contents.change, textgrid_filename.change],
             outputs=[download_btn],
         )
+        textgrid_file.change(
+            fn=extract_tier_names,
+            inputs=[textgrid_file],
+            outputs=[tier_names],
+        )
+        run_btn.click(
+            fn=transcribe_intervals,
+            inputs=[audio_in, textgrid_file, tier_names, target_tier, model_state],
+            outputs=[transcription_result, textgrid_preview]
+        )
+        transcription_result.change(
+            fn=lambda tg_text: tg_text,
+            inputs=transcription_result,
+            outputs=textgrid_contents
+        )
+        transcription_result.change(
+            fn=lambda tg_text, filename: write_textgrid(tg_text, filename),
+            inputs=[transcription_result, textgrid_filename],
+            outputs=download_btn
+        )
     demo.launch(max_file_size="100mb")