github-actions[bot] commited on
Commit
054fda3
·
1 Parent(s): c0c05c8

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +31 -23
app.py CHANGED
@@ -2,25 +2,25 @@ import os
2
  from pathlib import Path
3
  from typing import Tuple
4
  import gradio as gr
5
- import spaces
6
  from transformers import pipeline, Pipeline
7
  from huggingface_hub import repo_exists
8
 
9
 
 
 
10
  is_hf_space = os.getenv("IS_HF_SPACE")
 
11
  model_ids = [
12
  "",
13
- "mozilla-ai/whisper-small-gl (Galician)",
14
- "mozilla-ai/whisper-small-el (Greek)",
15
- "openai/whisper-tiny (Multilingual)",
16
- "openai/whisper-small (Multilingual)",
17
- "openai/whisper-medium (Multilingual)",
18
- "openai/whisper-large-v3 (Multilingual)",
19
- "openai/whisper-large-v3-turbo (Multilingual)",
20
  ]
21
 
22
 
23
- def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
24
  if not Path(model_dir).is_dir():
25
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
@@ -31,7 +31,9 @@ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
- tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
 
 
35
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
36
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
37
 
@@ -44,7 +46,7 @@ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
44
  ), f"✅ Local model has been loaded from {model_dir}."
45
 
46
 
47
- def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
48
  if not repo_exists(model_repo_id):
49
  return (
50
  None,
@@ -53,30 +55,31 @@ def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
53
  return pipeline(
54
  "automatic-speech-recognition",
55
  model=model_repo_id,
 
56
  ), f"✅ HF Model {model_repo_id} has been loaded."
57
 
58
 
59
  def load_model(
60
- dropdown_model_id: str, hf_model_id: str, local_model_id: str
61
  ) -> Tuple[Pipeline, str]:
62
  if dropdown_model_id and not hf_model_id and not local_model_id:
63
- dropdown_model_id = dropdown_model_id.split(" (")[0]
64
  yield None, f"Loading {dropdown_model_id}..."
65
- yield _load_hf_model(dropdown_model_id)
66
  elif hf_model_id and not local_model_id and not dropdown_model_id:
67
  yield None, f"Loading {hf_model_id}..."
68
- yield _load_hf_model(hf_model_id)
69
  elif local_model_id and not hf_model_id and not dropdown_model_id:
70
  yield None, f"Loading {local_model_id}..."
71
- yield _load_local_model(local_model_id)
72
  else:
73
  yield (
74
  None,
75
  "️️⚠️ Please select or fill at least and only one of the options above",
76
  )
 
 
77
 
78
 
79
- @spaces.GPU
80
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
81
  text = pipe(audio)["text"]
82
  return text
@@ -86,13 +89,18 @@ def setup_gradio_demo():
86
  with gr.Blocks() as demo:
87
  gr.Markdown(
88
  """ # 🗣️ Speech-to-Text Transcription
89
- ### 1. Select which model to load from one of the options below.
90
- ### 2. Load the model by clicking the Load model button.
91
- ### 3. Record a message or upload an audio file.
92
- ### 4. Click Transcribe to see the transcription generated by the model.
 
93
  """
94
  )
95
- ### Model selection ###
 
 
 
 
96
 
97
  with gr.Row():
98
  with gr.Column():
@@ -128,7 +136,7 @@ def setup_gradio_demo():
128
  model = gr.State()
129
  load_model_button.click(
130
  fn=load_model,
131
- inputs=[dropdown_model, user_model, local_model],
132
  outputs=[model, model_loaded],
133
  )
134
 
 
2
  from pathlib import Path
3
  from typing import Tuple
4
  import gradio as gr
 
5
  from transformers import pipeline, Pipeline
6
  from huggingface_hub import repo_exists
7
 
8
 
9
+ from speech_to_text_finetune.config import LANGUAGES_NAME_TO_ID
10
+
11
  is_hf_space = os.getenv("IS_HF_SPACE")
12
+ languages = LANGUAGES_NAME_TO_ID.keys()
13
  model_ids = [
14
  "",
15
+ "openai/whisper-tiny",
16
+ "openai/whisper-small",
17
+ "openai/whisper-medium",
18
+ "openai/whisper-large-v3",
19
+ "openai/whisper-large-v3-turbo",
 
 
20
  ]
21
 
22
 
23
+ def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, str]:
24
  if not Path(model_dir).is_dir():
25
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
 
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
+ tokenizer = WhisperTokenizer.from_pretrained(
35
+ model_dir, language=language, task="transcribe"
36
+ )
37
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
38
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
39
 
 
46
  ), f"✅ Local model has been loaded from {model_dir}."
47
 
48
 
49
+ def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None, str]:
50
  if not repo_exists(model_repo_id):
51
  return (
52
  None,
 
55
  return pipeline(
56
  "automatic-speech-recognition",
57
  model=model_repo_id,
58
+ generate_kwargs={"language": language},
59
  ), f"✅ HF Model {model_repo_id} has been loaded."
60
 
61
 
62
  def load_model(
63
+ language: str, dropdown_model_id: str, hf_model_id: str, local_model_id: str
64
  ) -> Tuple[Pipeline, str]:
65
  if dropdown_model_id and not hf_model_id and not local_model_id:
 
66
  yield None, f"Loading {dropdown_model_id}..."
67
+ yield _load_hf_model(dropdown_model_id, language)
68
  elif hf_model_id and not local_model_id and not dropdown_model_id:
69
  yield None, f"Loading {hf_model_id}..."
70
+ yield _load_hf_model(hf_model_id, language)
71
  elif local_model_id and not hf_model_id and not dropdown_model_id:
72
  yield None, f"Loading {local_model_id}..."
73
+ yield _load_local_model(local_model_id, language)
74
  else:
75
  yield (
76
  None,
77
  "️️⚠️ Please select or fill at least and only one of the options above",
78
  )
79
+ if not language:
80
+ yield None, "⚠️ Please select a language from the dropdown"
81
 
82
 
 
83
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
84
  text = pipe(audio)["text"]
85
  return text
 
89
  with gr.Blocks() as demo:
90
  gr.Markdown(
91
  """ # 🗣️ Speech-to-Text Transcription
92
+ ### 1. Select a language from the dropdown menu.
93
+ ### 2. Select which model to load from one of the options below.
94
+ ### 3. Load the model by clicking the Load model button.
95
+ ### 4. Record a message or upload an audio file.
96
+ ### 5. Click Transcribe to see the transcription generated by the model.
97
  """
98
  )
99
+ ### Language & Model selection ###
100
+
101
+ selected_lang = gr.Dropdown(
102
+ choices=list(languages), value=None, label="Select a language"
103
+ )
104
 
105
  with gr.Row():
106
  with gr.Column():
 
136
  model = gr.State()
137
  load_model_button.click(
138
  fn=load_model,
139
+ inputs=[selected_lang, dropdown_model, user_model, local_model],
140
  outputs=[model, model_loaded],
141
  )
142