github-actions[bot] commited on
Commit
2fa9a5c
·
1 Parent(s): 3704634

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +44 -18
app.py CHANGED
@@ -1,7 +1,11 @@
1
  import os
 
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import pipeline, Pipeline
 
 
5
 
6
  is_hf_space = os.getenv("IS_HF_SPACE")
7
  model_ids = [
@@ -16,7 +20,9 @@ model_ids = [
16
  ]
17
 
18
 
19
- def _load_local_model(model_dir: str) -> Pipeline:
 
 
20
  from transformers import (
21
  WhisperProcessor,
22
  WhisperTokenizer,
@@ -35,32 +41,43 @@ def _load_local_model(model_dir: str) -> Pipeline:
35
  processor=processor,
36
  tokenizer=tokenizer,
37
  feature_extractor=feature_extractor,
38
- )
39
 
40
 
41
- def _load_hf_model(model_repo_id: str) -> Pipeline:
 
 
 
 
 
42
  return pipeline(
43
  "automatic-speech-recognition",
44
  model=model_repo_id,
45
- )
46
 
47
 
48
- @spaces.GPU
49
- def transcribe(
50
- dropdown_model_id: str,
51
- hf_model_id: str,
52
- local_model_id: str,
53
- audio: gr.Audio,
54
- ) -> str:
55
  if dropdown_model_id and not hf_model_id and not local_model_id:
56
  dropdown_model_id = dropdown_model_id.split(" (")[0]
57
- pipe = _load_hf_model(dropdown_model_id)
 
58
  elif hf_model_id and not local_model_id and not dropdown_model_id:
59
- pipe = _load_hf_model(hf_model_id)
 
60
  elif local_model_id and not hf_model_id and not dropdown_model_id:
61
- pipe = _load_local_model(local_model_id)
 
62
  else:
63
- return ("️️⚠️ Please select or fill at least and only one of the options above",)
 
 
 
 
 
 
 
64
  text = pipe(audio)["text"]
65
  return text
66
 
@@ -93,6 +110,9 @@ def setup_gradio_demo():
93
  placeholder="artifacts/my-whisper-tiny",
94
  )
95
 
 
 
 
96
  ### Transcription ###
97
  audio_input = gr.Audio(
98
  sources=["microphone", "upload"],
@@ -104,10 +124,16 @@ def setup_gradio_demo():
104
  transcribe_button = gr.Button("Transcribe")
105
  transcribe_output = gr.Text(label="Output")
106
 
 
 
 
 
 
 
 
 
107
  transcribe_button.click(
108
- fn=transcribe,
109
- inputs=[dropdown_model, user_model, local_model, audio_input],
110
- outputs=transcribe_output,
111
  )
112
 
113
  demo.launch()
 
1
  import os
2
+ from pathlib import Path
3
+ from typing import Tuple
4
  import gradio as gr
5
  import spaces
6
  from transformers import pipeline, Pipeline
7
+ from huggingface_hub import repo_exists
8
+
9
 
10
  is_hf_space = os.getenv("IS_HF_SPACE")
11
  model_ids = [
 
20
  ]
21
 
22
 
23
+ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
24
+ if not Path(model_dir).is_dir():
25
+ return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
27
  WhisperProcessor,
28
  WhisperTokenizer,
 
41
  processor=processor,
42
  tokenizer=tokenizer,
43
  feature_extractor=feature_extractor,
44
+ ), f"✅ Local model has been loaded from {model_dir}."
45
 
46
 
47
+ def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
48
+ if not repo_exists(model_repo_id):
49
+ return (
50
+ None,
51
+ f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.",
52
+ )
53
  return pipeline(
54
  "automatic-speech-recognition",
55
  model=model_repo_id,
56
+ ), f"✅ HF Model {model_repo_id} has been loaded."
57
 
58
 
59
+ def load_model(
60
+ dropdown_model_id: str, hf_model_id: str, local_model_id: str
61
+ ) -> Tuple[Pipeline, str]:
 
 
 
 
62
  if dropdown_model_id and not hf_model_id and not local_model_id:
63
  dropdown_model_id = dropdown_model_id.split(" (")[0]
64
+ yield None, f"Loading {dropdown_model_id}..."
65
+ yield _load_hf_model(dropdown_model_id)
66
  elif hf_model_id and not local_model_id and not dropdown_model_id:
67
+ yield None, f"Loading {hf_model_id}..."
68
+ yield _load_hf_model(hf_model_id)
69
  elif local_model_id and not hf_model_id and not dropdown_model_id:
70
+ yield None, f"Loading {local_model_id}..."
71
+ yield _load_local_model(local_model_id)
72
  else:
73
+ yield (
74
+ None,
75
+ "️️⚠️ Please select or fill at least and only one of the options above",
76
+ )
77
+
78
+
79
+ @spaces.GPU
80
+ def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
81
  text = pipe(audio)["text"]
82
  return text
83
 
 
110
  placeholder="artifacts/my-whisper-tiny",
111
  )
112
 
113
+ load_model_button = gr.Button("Load model")
114
+ model_loaded = gr.Markdown()
115
+
116
  ### Transcription ###
117
  audio_input = gr.Audio(
118
  sources=["microphone", "upload"],
 
124
  transcribe_button = gr.Button("Transcribe")
125
  transcribe_output = gr.Text(label="Output")
126
 
127
+ ### Event listeners ###
128
+ model = gr.State()
129
+ load_model_button.click(
130
+ fn=load_model,
131
+ inputs=[dropdown_model, user_model, local_model],
132
+ outputs=[model, model_loaded],
133
+ )
134
+
135
  transcribe_button.click(
136
+ fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output
 
 
137
  )
138
 
139
  demo.launch()