github-actions[bot] commited on
Commit
29a8a9a
·
1 Parent(s): 2fa9a5c

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +18 -44
app.py CHANGED
@@ -1,11 +1,7 @@
1
  import os
2
- from pathlib import Path
3
- from typing import Tuple
4
  import gradio as gr
5
  import spaces
6
  from transformers import pipeline, Pipeline
7
- from huggingface_hub import repo_exists
8
-
9
 
10
  is_hf_space = os.getenv("IS_HF_SPACE")
11
  model_ids = [
@@ -20,9 +16,7 @@ model_ids = [
20
  ]
21
 
22
 
23
- def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
24
- if not Path(model_dir).is_dir():
25
- return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
27
  WhisperProcessor,
28
  WhisperTokenizer,
@@ -41,43 +35,32 @@ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
41
  processor=processor,
42
  tokenizer=tokenizer,
43
  feature_extractor=feature_extractor,
44
- ), f"✅ Local model has been loaded from {model_dir}."
45
 
46
 
47
- def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
48
- if not repo_exists(model_repo_id):
49
- return (
50
- None,
51
- f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.",
52
- )
53
  return pipeline(
54
  "automatic-speech-recognition",
55
  model=model_repo_id,
56
- ), f"✅ HF Model {model_repo_id} has been loaded."
57
 
58
 
59
- def load_model(
60
- dropdown_model_id: str, hf_model_id: str, local_model_id: str
61
- ) -> Tuple[Pipeline, str]:
 
 
 
 
62
  if dropdown_model_id and not hf_model_id and not local_model_id:
63
  dropdown_model_id = dropdown_model_id.split(" (")[0]
64
- yield None, f"Loading {dropdown_model_id}..."
65
- yield _load_hf_model(dropdown_model_id)
66
  elif hf_model_id and not local_model_id and not dropdown_model_id:
67
- yield None, f"Loading {hf_model_id}..."
68
- yield _load_hf_model(hf_model_id)
69
  elif local_model_id and not hf_model_id and not dropdown_model_id:
70
- yield None, f"Loading {local_model_id}..."
71
- yield _load_local_model(local_model_id)
72
  else:
73
- yield (
74
- None,
75
- "️️⚠️ Please select or fill at least and only one of the options above",
76
- )
77
-
78
-
79
- @spaces.GPU
80
- def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
81
  text = pipe(audio)["text"]
82
  return text
83
 
@@ -110,9 +93,6 @@ def setup_gradio_demo():
110
  placeholder="artifacts/my-whisper-tiny",
111
  )
112
 
113
- load_model_button = gr.Button("Load model")
114
- model_loaded = gr.Markdown()
115
-
116
  ### Transcription ###
117
  audio_input = gr.Audio(
118
  sources=["microphone", "upload"],
@@ -124,16 +104,10 @@ def setup_gradio_demo():
124
  transcribe_button = gr.Button("Transcribe")
125
  transcribe_output = gr.Text(label="Output")
126
 
127
- ### Event listeners ###
128
- model = gr.State()
129
- load_model_button.click(
130
- fn=load_model,
131
- inputs=[dropdown_model, user_model, local_model],
132
- outputs=[model, model_loaded],
133
- )
134
-
135
  transcribe_button.click(
136
- fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output
 
 
137
  )
138
 
139
  demo.launch()
 
1
  import os
 
 
2
  import gradio as gr
3
  import spaces
4
  from transformers import pipeline, Pipeline
 
 
5
 
6
  is_hf_space = os.getenv("IS_HF_SPACE")
7
  model_ids = [
 
16
  ]
17
 
18
 
19
+ def _load_local_model(model_dir: str) -> Pipeline:
 
 
20
  from transformers import (
21
  WhisperProcessor,
22
  WhisperTokenizer,
 
35
  processor=processor,
36
  tokenizer=tokenizer,
37
  feature_extractor=feature_extractor,
38
+ )
39
 
40
 
41
+ def _load_hf_model(model_repo_id: str) -> Pipeline:
 
 
 
 
 
42
  return pipeline(
43
  "automatic-speech-recognition",
44
  model=model_repo_id,
45
+ )
46
 
47
 
48
+ @spaces.GPU
49
+ def transcribe(
50
+ dropdown_model_id: str,
51
+ hf_model_id: str,
52
+ local_model_id: str,
53
+ audio: gr.Audio,
54
+ ) -> str:
55
  if dropdown_model_id and not hf_model_id and not local_model_id:
56
  dropdown_model_id = dropdown_model_id.split(" (")[0]
57
+ pipe = _load_hf_model(dropdown_model_id)
 
58
  elif hf_model_id and not local_model_id and not dropdown_model_id:
59
+ pipe = _load_hf_model(hf_model_id)
 
60
  elif local_model_id and not hf_model_id and not dropdown_model_id:
61
+ pipe = _load_local_model(local_model_id)
 
62
  else:
63
+ return "️️⚠️ Please select or fill at least and only one of the options above"
 
 
 
 
 
 
 
64
  text = pipe(audio)["text"]
65
  return text
66
 
 
93
  placeholder="artifacts/my-whisper-tiny",
94
  )
95
 
 
 
 
96
  ### Transcription ###
97
  audio_input = gr.Audio(
98
  sources=["microphone", "upload"],
 
104
  transcribe_button = gr.Button("Transcribe")
105
  transcribe_output = gr.Text(label="Output")
106
 
 
 
 
 
 
 
 
 
107
  transcribe_button.click(
108
+ fn=transcribe,
109
+ inputs=[dropdown_model, user_model, local_model, audio_input],
110
+ outputs=transcribe_output,
111
  )
112
 
113
  demo.launch()