Spaces:
Running
Running
Initial attempt at adding textgrid format download
Browse files- app.py +67 -22
- requirements.txt +4 -2
app.py
CHANGED
@@ -1,3 +1,5 @@
|
|
|
|
|
|
1 |
import gradio as gr
|
2 |
|
3 |
from transformers import pipeline
|
@@ -21,11 +23,10 @@ VALID_MODELS = [
|
|
21 |
"ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
|
22 |
"ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
|
23 |
"ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
|
24 |
-
|
25 |
]
|
26 |
|
27 |
|
28 |
-
def load_model_and_predict(model_name, audio_in, model_state):
|
29 |
if model_state["model_name"] != model_name:
|
30 |
model_state = {
|
31 |
"loaded_model": pipeline(
|
@@ -34,7 +35,16 @@ def load_model_and_predict(model_name, audio_in, model_state):
|
|
34 |
"model_name": model_name,
|
35 |
}
|
36 |
|
37 |
-
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
|
40 |
def launch_demo():
|
@@ -44,25 +54,60 @@ def launch_demo():
|
|
44 |
),
|
45 |
"model_name": DEFAULT_MODEL,
|
46 |
}
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
|
67 |
demo.launch()
|
68 |
|
|
|
1 |
+
from pathlib import Path
|
2 |
+
|
3 |
import gradio as gr
|
4 |
|
5 |
from transformers import pipeline
|
|
|
23 |
"ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
|
24 |
"ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
|
25 |
"ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
|
|
|
26 |
]
|
27 |
|
28 |
|
29 |
+
def load_model_and_predict(model_name: str, audio_in: str, model_state: dict):
|
30 |
if model_state["model_name"] != model_name:
|
31 |
model_state = {
|
32 |
"loaded_model": pipeline(
|
|
|
35 |
"model_name": model_name,
|
36 |
}
|
37 |
|
38 |
+
return (
|
39 |
+
model_state["loaded_model"](audio_in)["text"],
|
40 |
+
model_state,
|
41 |
+
gr.DownloadButton("Download TextGrid file", visible=True),
|
42 |
+
)
|
43 |
+
|
44 |
+
|
45 |
+
def download_textgrid(audio_in, textgrid_tier_name, prediction):
|
46 |
+
# TODO
|
47 |
+
pass
|
48 |
|
49 |
|
50 |
def launch_demo():
|
|
|
54 |
),
|
55 |
"model_name": DEFAULT_MODEL,
|
56 |
}
|
57 |
+
|
58 |
+
with gr.Blocks() as demo:
|
59 |
+
gr.Markdown(
|
60 |
+
"""# Automatic International Phonetic Alphabet Transcription
|
61 |
+
This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.""",
|
62 |
+
)
|
63 |
+
model_name = gr.Dropdown(
|
64 |
+
VALID_MODELS,
|
65 |
+
value=DEFAULT_MODEL,
|
66 |
+
label="IPA transcription ASR model",
|
67 |
+
info="Select the model to use for prediction.",
|
68 |
+
)
|
69 |
+
audio_in = gr.Audio(type="filepath", show_download_button=True)
|
70 |
+
model_state = gr.State(value=initial_model)
|
71 |
+
|
72 |
+
prediction = gr.Textbox(label="Predicted IPA transcription")
|
73 |
+
|
74 |
+
textgrid_tier = gr.Textbox(
|
75 |
+
label="TextGrid Tier Name", value="transcription", interactive=True
|
76 |
+
)
|
77 |
+
|
78 |
+
download_btn = gr.DownloadButton("Download TextGrid file", visible=False)
|
79 |
+
|
80 |
+
# If user updates model name or audio, run prediction
|
81 |
+
audio_in.input(
|
82 |
+
fn=load_model_and_predict,
|
83 |
+
inputs=[model_name, audio_in, model_state],
|
84 |
+
outputs=[prediction, model_state, download_btn],
|
85 |
+
)
|
86 |
+
model_name.change(
|
87 |
+
fn=load_model_and_predict,
|
88 |
+
inputs=[model_name, audio_in, model_state],
|
89 |
+
outputs=[prediction, model_state, download_btn],
|
90 |
+
)
|
91 |
+
|
92 |
+
# demo = gr.Interface(
|
93 |
+
# fn=load_model_and_predict,
|
94 |
+
# inputs=[
|
95 |
+
# gr.Dropdown(
|
96 |
+
# VALID_MODELS,
|
97 |
+
# value=DEFAULT_MODEL,
|
98 |
+
# label="IPA transcription ASR model",
|
99 |
+
# info="Select the model to use for prediction.",
|
100 |
+
# ),
|
101 |
+
# gr.Audio(type="filepath", show_download_button=True),
|
102 |
+
# gr.State(
|
103 |
+
# value=initial_model
|
104 |
+
# ), # Store the name of the currently loaded model
|
105 |
+
# ],
|
106 |
+
# outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()],
|
107 |
+
# allow_flagging="never",
|
108 |
+
# title="Automatic International Phonetic Alphabet Transcription",
|
109 |
+
# description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.",
|
110 |
+
# )
|
111 |
|
112 |
demo.launch()
|
113 |
|
requirements.txt
CHANGED
@@ -1,2 +1,4 @@
|
|
1 |
-
|
2 |
-
|
|
|
|
|
|
1 |
+
ffmpeg
|
2 |
+
librosa
|
3 |
+
tgt
|
4 |
+
transformers[torch]
|