ginic commited on
Commit
44993c6
·
1 Parent(s): 557f37f

Initial attempt at adding textgrid format download

Browse files
Files changed (2) hide show
  1. app.py +67 -22
  2. requirements.txt +4 -2
app.py CHANGED
@@ -1,3 +1,5 @@
 
 
1
  import gradio as gr
2
 
3
  from transformers import pipeline
@@ -21,11 +23,10 @@ VALID_MODELS = [
21
  "ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
22
  "ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
23
  "ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
24
-
25
  ]
26
 
27
 
28
- def load_model_and_predict(model_name, audio_in, model_state):
29
  if model_state["model_name"] != model_name:
30
  model_state = {
31
  "loaded_model": pipeline(
@@ -34,7 +35,16 @@ def load_model_and_predict(model_name, audio_in, model_state):
34
  "model_name": model_name,
35
  }
36
 
37
- return model_state["loaded_model"](audio_in)["text"], model_state
 
 
 
 
 
 
 
 
 
38
 
39
 
40
  def launch_demo():
@@ -44,25 +54,60 @@ def launch_demo():
44
  ),
45
  "model_name": DEFAULT_MODEL,
46
  }
47
- demo = gr.Interface(
48
- fn=load_model_and_predict,
49
- inputs=[
50
- gr.Dropdown(
51
- VALID_MODELS,
52
- value=DEFAULT_MODEL,
53
- label="IPA transcription ASR model",
54
- info="Select the model to use for prediction.",
55
- ),
56
- gr.Audio(type="filepath"),
57
- gr.State(
58
- value=initial_model
59
- ), # Store the name of the currently loaded model
60
- ],
61
- outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()],
62
- allow_flagging="never",
63
- title="Automatic International Phonetic Alphabet Transcription",
64
- description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.",
65
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  demo.launch()
68
 
 
1
+ from pathlib import Path
2
+
3
  import gradio as gr
4
 
5
  from transformers import pipeline
 
23
  "ginic/gender_split_70_female_3_wav2vec2-large-xlsr-53-buckeye-ipa",
24
  "ginic/gender_split_70_female_4_wav2vec2-large-xlsr-53-buckeye-ipa",
25
  "ginic/gender_split_70_female_5_wav2vec2-large-xlsr-53-buckeye-ipa",
 
26
  ]
27
 
28
 
29
+ def load_model_and_predict(model_name: str, audio_in: str, model_state: dict):
30
  if model_state["model_name"] != model_name:
31
  model_state = {
32
  "loaded_model": pipeline(
 
35
  "model_name": model_name,
36
  }
37
 
38
+ return (
39
+ model_state["loaded_model"](audio_in)["text"],
40
+ model_state,
41
+ gr.DownloadButton("Download TextGrid file", visible=True),
42
+ )
43
+
44
+
45
+ def download_textgrid(audio_in, textgrid_tier_name, prediction):
46
+ # TODO
47
+ pass
48
 
49
 
50
  def launch_demo():
 
54
  ),
55
  "model_name": DEFAULT_MODEL,
56
  }
57
+
58
+ with gr.Blocks() as demo:
59
+ gr.Markdown(
60
+ """# Automatic International Phonetic Alphabet Transcription
61
+ This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.""",
62
+ )
63
+ model_name = gr.Dropdown(
64
+ VALID_MODELS,
65
+ value=DEFAULT_MODEL,
66
+ label="IPA transcription ASR model",
67
+ info="Select the model to use for prediction.",
68
+ )
69
+ audio_in = gr.Audio(type="filepath", show_download_button=True)
70
+ model_state = gr.State(value=initial_model)
71
+
72
+ prediction = gr.Textbox(label="Predicted IPA transcription")
73
+
74
+ textgrid_tier = gr.Textbox(
75
+ label="TextGrid Tier Name", value="transcription", interactive=True
76
+ )
77
+
78
+ download_btn = gr.DownloadButton("Download TextGrid file", visible=False)
79
+
80
+ # If user updates model name or audio, run prediction
81
+ audio_in.input(
82
+ fn=load_model_and_predict,
83
+ inputs=[model_name, audio_in, model_state],
84
+ outputs=[prediction, model_state, download_btn],
85
+ )
86
+ model_name.change(
87
+ fn=load_model_and_predict,
88
+ inputs=[model_name, audio_in, model_state],
89
+ outputs=[prediction, model_state, download_btn],
90
+ )
91
+
92
+ # demo = gr.Interface(
93
+ # fn=load_model_and_predict,
94
+ # inputs=[
95
+ # gr.Dropdown(
96
+ # VALID_MODELS,
97
+ # value=DEFAULT_MODEL,
98
+ # label="IPA transcription ASR model",
99
+ # info="Select the model to use for prediction.",
100
+ # ),
101
+ # gr.Audio(type="filepath", show_download_button=True),
102
+ # gr.State(
103
+ # value=initial_model
104
+ # ), # Store the name of the currently loaded model
105
+ # ],
106
+ # outputs=[gr.Textbox(label="Predicted IPA transcription"), gr.State()],
107
+ # allow_flagging="never",
108
+ # title="Automatic International Phonetic Alphabet Transcription",
109
+ # description="This demo allows you to experiment with producing phonetic transcriptions of uploaded or recorded audio using a selected automatic speech recognition (ASR) model.",
110
+ # )
111
 
112
  demo.launch()
113
 
requirements.txt CHANGED
@@ -1,2 +1,4 @@
1
- transformers[torch]
2
- ffmpeg
 
 
 
1
+ ffmpeg
2
+ librosa
3
+ tgt
4
+ transformers[torch]