Spaces:

ginic
/

multipa-english-to-ipa

Running

App Files Files Community

ginic commited on Jan 6

Commit

44993c6

1 Parent(s): 557f37f

Initial attempt at adding textgrid format download

Browse files

Files changed (2) hide show

app.py +67 -22
requirements.txt +4 -2

app.py CHANGED Viewed

@@ -1,3 +1,5 @@
 import gradio as gr
 from transformers import pipeline
@@ -21,11 +23,10 @@ VALID_MODELS = [
     "ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
     "ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
     "ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
 ]
-def load_model_and_predict(model_name, audio_in, model_state):
     if model_state["model_name"] != model_name:
         model_state = {
             "loaded_model": pipeline(
@@ -34,7 +35,16 @@ def load_model_and_predict(model_name, audio_in, model_state):
             "model_name": model_name,
         }
-    return model_state["loaded_model"](audio_in)["text"], model_state
 def launch_demo():
@@ -44,25 +54,60 @@ def launch_demo():
         ),
         "model_name": DEFAULT_MODEL,
     }
-    demo = gr.Interface(
-        fn=load_model_and_predict,
-        inputs=[
-            gr.Dropdown(
-                VALID_MODELS,
-                value=DEFAULT_MODEL,
-                label="IPA transcription ASR model",
-                info="Select the model to use for prediction.",
-            ),
-            gr.Audio(type="filepath"),
-            gr.State(
-                value=initial_model
-            ),  # Store the name of the currently loaded model
-        ],
-        outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()],
-        allow_flagging="never",
-        title="Automatic International Phonetic Alphabet Transcription",
-        description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.",
-    )
     demo.launch()

+from pathlib import Path
 import gradio as gr
 from transformers import pipeline
     "ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
     "ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
     "ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
 ]
+def load_model_and_predict(model_name: str, audio_in: str, model_state: dict):
     if model_state["model_name"] != model_name:
         model_state = {
             "loaded_model": pipeline(
             "model_name": model_name,
         }
+    return (
+        model_state["loaded_model"](audio_in)["text"],
+        model_state,
+        gr.DownloadButton("Download TextGrid file", visible=True),
+    )
+def download_textgrid(audio_in, textgrid_tier_name, prediction):
+    # TODO
+    pass
 def launch_demo():
         ),
         "model_name": DEFAULT_MODEL,
     }
+    with gr.Blocks() as demo:
+        gr.Markdown(
+            """# Automatic International Phonetic Alphabet Transcription
+            This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.""",
+        )
+        model_name = gr.Dropdown(
+            VALID_MODELS,
+            value=DEFAULT_MODEL,
+            label="IPA transcription ASR model",
+            info="Select the model to use for prediction.",
+        )
+        audio_in = gr.Audio(type="filepath", show_download_button=True)
+        model_state = gr.State(value=initial_model)
+        prediction = gr.Textbox(label="Predicted IPA transcription")
+        textgrid_tier = gr.Textbox(
+            label="TextGrid Tier Name", value="transcription", interactive=True
+        )
+        download_btn = gr.DownloadButton("Download TextGrid file", visible=False)
+        # If user updates model name or audio, run prediction
+        audio_in.input(
+            fn=load_model_and_predict,
+            inputs=[model_name, audio_in, model_state],
+            outputs=[prediction, model_state, download_btn],
+        )
+        model_name.change(
+            fn=load_model_and_predict,
+            inputs=[model_name, audio_in, model_state],
+            outputs=[prediction, model_state, download_btn],
+        )
+    # demo = gr.Interface(
+    #     fn=load_model_and_predict,
+    #     inputs=[
+    #         gr.Dropdown(
+    #             VALID_MODELS,
+    #             value=DEFAULT_MODEL,
+    #             label="IPA transcription ASR model",
+    #             info="Select the model to use for prediction.",
+    #         ),
+    #         gr.Audio(type="filepath", show_download_button=True),
+    #         gr.State(
+    #             value=initial_model
+    #         ),  # Store the name of the currently loaded model
+    #     ],
+    #     outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()],
+    #     allow_flagging="never",
+    #     title="Automatic International Phonetic Alphabet Transcription",
+    #     description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.",
+    # )
     demo.launch()

requirements.txt CHANGED Viewed

@@ -1,2 +1,4 @@
-transformers[torch]
-ffmpeg

+ffmpeg
+librosa
+tgt
+transformers[torch]