github-actions[bot] commited on
Commit
aa686dd
·
1 Parent(s): 06b5240

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +31 -21
app.py CHANGED
@@ -6,20 +6,21 @@ from transformers import pipeline, Pipeline
6
  from huggingface_hub import repo_exists
7
 
8
 
 
 
9
  is_hf_space = os.getenv("IS_HF_SPACE")
 
10
  model_ids = [
11
  "",
12
- "mozilla-ai/whisper-small-gl (Galician)",
13
- "mozilla-ai/whisper-small-el (Greek)",
14
- "openai/whisper-tiny (Multilingual)",
15
- "openai/whisper-small (Multilingual)",
16
- "openai/whisper-medium (Multilingual)",
17
- "openai/whisper-large-v3 (Multilingual)",
18
- "openai/whisper-large-v3-turbo (Multilingual)",
19
  ]
20
 
21
 
22
- def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
23
  if not Path(model_dir).is_dir():
24
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
25
  from transformers import (
@@ -30,7 +31,9 @@ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
30
  )
31
 
32
  processor = WhisperProcessor.from_pretrained(model_dir)
33
- tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
 
 
34
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
35
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
36
 
@@ -43,7 +46,7 @@ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
43
  ), f"✅ Local model has been loaded from {model_dir}."
44
 
45
 
46
- def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
47
  if not repo_exists(model_repo_id):
48
  return (
49
  None,
@@ -52,27 +55,29 @@ def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
52
  return pipeline(
53
  "automatic-speech-recognition",
54
  model=model_repo_id,
 
55
  ), f"✅ HF Model {model_repo_id} has been loaded."
56
 
57
 
58
  def load_model(
59
- dropdown_model_id: str, hf_model_id: str, local_model_id: str
60
  ) -> Tuple[Pipeline, str]:
61
  if dropdown_model_id and not hf_model_id and not local_model_id:
62
- dropdown_model_id = dropdown_model_id.split(" (")[0]
63
  yield None, f"Loading {dropdown_model_id}..."
64
- yield _load_hf_model(dropdown_model_id)
65
  elif hf_model_id and not local_model_id and not dropdown_model_id:
66
  yield None, f"Loading {hf_model_id}..."
67
- yield _load_hf_model(hf_model_id)
68
  elif local_model_id and not hf_model_id and not dropdown_model_id:
69
  yield None, f"Loading {local_model_id}..."
70
- yield _load_local_model(local_model_id)
71
  else:
72
  yield (
73
  None,
74
  "️️⚠️ Please select or fill at least and only one of the options above",
75
  )
 
 
76
 
77
 
78
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
@@ -84,13 +89,18 @@ def setup_gradio_demo():
84
  with gr.Blocks() as demo:
85
  gr.Markdown(
86
  """ # 🗣️ Speech-to-Text Transcription
87
- ### 1. Select which model to load from one of the options below.
88
- ### 2. Load the model by clicking the Load model button.
89
- ### 3. Record a message or upload an audio file.
90
- ### 4. Click Transcribe to see the transcription generated by the model.
 
91
  """
92
  )
93
- ### Model selection ###
 
 
 
 
94
 
95
  with gr.Row():
96
  with gr.Column():
@@ -126,7 +136,7 @@ def setup_gradio_demo():
126
  model = gr.State()
127
  load_model_button.click(
128
  fn=load_model,
129
- inputs=[dropdown_model, user_model, local_model],
130
  outputs=[model, model_loaded],
131
  )
132
 
 
6
  from huggingface_hub import repo_exists
7
 
8
 
9
+ from speech_to_text_finetune.config import LANGUAGES_NAME_TO_ID
10
+
11
  is_hf_space = os.getenv("IS_HF_SPACE")
12
+ languages = LANGUAGES_NAME_TO_ID.keys()
13
  model_ids = [
14
  "",
15
+ "openai/whisper-tiny",
16
+ "openai/whisper-small",
17
+ "openai/whisper-medium",
18
+ "openai/whisper-large-v3",
19
+ "openai/whisper-large-v3-turbo",
 
 
20
  ]
21
 
22
 
23
+ def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, str]:
24
  if not Path(model_dir).is_dir():
25
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
 
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
+ tokenizer = WhisperTokenizer.from_pretrained(
35
+ model_dir, language=language, task="transcribe"
36
+ )
37
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
38
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
39
 
 
46
  ), f"✅ Local model has been loaded from {model_dir}."
47
 
48
 
49
+ def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None, str]:
50
  if not repo_exists(model_repo_id):
51
  return (
52
  None,
 
55
  return pipeline(
56
  "automatic-speech-recognition",
57
  model=model_repo_id,
58
+ generate_kwargs={"language": language},
59
  ), f"✅ HF Model {model_repo_id} has been loaded."
60
 
61
 
62
  def load_model(
63
+ language: str, dropdown_model_id: str, hf_model_id: str, local_model_id: str
64
  ) -> Tuple[Pipeline, str]:
65
  if dropdown_model_id and not hf_model_id and not local_model_id:
 
66
  yield None, f"Loading {dropdown_model_id}..."
67
+ yield _load_hf_model(dropdown_model_id, language)
68
  elif hf_model_id and not local_model_id and not dropdown_model_id:
69
  yield None, f"Loading {hf_model_id}..."
70
+ yield _load_hf_model(hf_model_id, language)
71
  elif local_model_id and not hf_model_id and not dropdown_model_id:
72
  yield None, f"Loading {local_model_id}..."
73
+ yield _load_local_model(local_model_id, language)
74
  else:
75
  yield (
76
  None,
77
  "️️⚠️ Please select or fill at least and only one of the options above",
78
  )
79
+ if not language:
80
+ yield None, "⚠️ Please select a language from the dropdown"
81
 
82
 
83
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
 
89
  with gr.Blocks() as demo:
90
  gr.Markdown(
91
  """ # 🗣️ Speech-to-Text Transcription
92
+ ### 1. Select a language from the dropdown menu.
93
+ ### 2. Select which model to load from one of the options below.
94
+ ### 3. Load the model by clicking the Load model button.
95
+ ### 4. Record a message or upload an audio file.
96
+ ### 5. Click Transcribe to see the transcription generated by the model.
97
  """
98
  )
99
+ ### Language & Model selection ###
100
+
101
+ selected_lang = gr.Dropdown(
102
+ choices=list(languages), value=None, label="Select a language"
103
+ )
104
 
105
  with gr.Row():
106
  with gr.Column():
 
136
  model = gr.State()
137
  load_model_button.click(
138
  fn=load_model,
139
+ inputs=[selected_lang, dropdown_model, user_model, local_model],
140
  outputs=[model, model_loaded],
141
  )
142