github-actions[bot] commited on
Commit
c0c05c8
·
1 Parent(s): cc36035

Sync with https://github.com/mozilla-ai/speech-to-text-finetune

Browse files
Files changed (1) hide show
  1. app.py +23 -31
app.py CHANGED
@@ -2,25 +2,25 @@ import os
2
  from pathlib import Path
3
  from typing import Tuple
4
  import gradio as gr
 
5
  from transformers import pipeline, Pipeline
6
  from huggingface_hub import repo_exists
7
 
8
 
9
- from speech_to_text_finetune.config import LANGUAGES_NAME_TO_ID
10
-
11
  is_hf_space = os.getenv("IS_HF_SPACE")
12
- languages = LANGUAGES_NAME_TO_ID.keys()
13
  model_ids = [
14
  "",
15
- "openai/whisper-tiny",
16
- "openai/whisper-small",
17
- "openai/whisper-medium",
18
- "openai/whisper-large-v3",
19
- "openai/whisper-large-v3-turbo",
 
 
20
  ]
21
 
22
 
23
- def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, str]:
24
  if not Path(model_dir).is_dir():
25
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
@@ -31,9 +31,7 @@ def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, s
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
- tokenizer = WhisperTokenizer.from_pretrained(
35
- model_dir, language=language, task="transcribe"
36
- )
37
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
38
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
39
 
@@ -46,7 +44,7 @@ def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, s
46
  ), f"✅ Local model has been loaded from {model_dir}."
47
 
48
 
49
- def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None, str]:
50
  if not repo_exists(model_repo_id):
51
  return (
52
  None,
@@ -55,31 +53,30 @@ def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None,
55
  return pipeline(
56
  "automatic-speech-recognition",
57
  model=model_repo_id,
58
- generate_kwargs={"language": language},
59
  ), f"✅ HF Model {model_repo_id} has been loaded."
60
 
61
 
62
  def load_model(
63
- language: str, dropdown_model_id: str, hf_model_id: str, local_model_id: str
64
  ) -> Tuple[Pipeline, str]:
65
  if dropdown_model_id and not hf_model_id and not local_model_id:
 
66
  yield None, f"Loading {dropdown_model_id}..."
67
- yield _load_hf_model(dropdown_model_id, language)
68
  elif hf_model_id and not local_model_id and not dropdown_model_id:
69
  yield None, f"Loading {hf_model_id}..."
70
- yield _load_hf_model(hf_model_id, language)
71
  elif local_model_id and not hf_model_id and not dropdown_model_id:
72
  yield None, f"Loading {local_model_id}..."
73
- yield _load_local_model(local_model_id, language)
74
  else:
75
  yield (
76
  None,
77
  "️️⚠️ Please select or fill at least and only one of the options above",
78
  )
79
- if not language:
80
- yield None, "⚠️ Please select a language from the dropdown"
81
 
82
 
 
83
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
84
  text = pipe(audio)["text"]
85
  return text
@@ -89,18 +86,13 @@ def setup_gradio_demo():
89
  with gr.Blocks() as demo:
90
  gr.Markdown(
91
  """ # 🗣️ Speech-to-Text Transcription
92
- ### 1. Select a language from the dropdown menu.
93
- ### 2. Select which model to load from one of the options below.
94
- ### 3. Load the model by clicking the Load model button.
95
- ### 4. Record a message or upload an audio file.
96
- ### 5. Click Transcribe to see the transcription generated by the model.
97
  """
98
  )
99
- ### Language & Model selection ###
100
-
101
- selected_lang = gr.Dropdown(
102
- choices=list(languages), value=None, label="Select a language"
103
- )
104
 
105
  with gr.Row():
106
  with gr.Column():
@@ -136,7 +128,7 @@ def setup_gradio_demo():
136
  model = gr.State()
137
  load_model_button.click(
138
  fn=load_model,
139
- inputs=[selected_lang, dropdown_model, user_model, local_model],
140
  outputs=[model, model_loaded],
141
  )
142
 
 
2
  from pathlib import Path
3
  from typing import Tuple
4
  import gradio as gr
5
+ import spaces
6
  from transformers import pipeline, Pipeline
7
  from huggingface_hub import repo_exists
8
 
9
 
 
 
10
  is_hf_space = os.getenv("IS_HF_SPACE")
 
11
  model_ids = [
12
  "",
13
+ "mozilla-ai/whisper-small-gl (Galician)",
14
+ "mozilla-ai/whisper-small-el (Greek)",
15
+ "openai/whisper-tiny (Multilingual)",
16
+ "openai/whisper-small (Multilingual)",
17
+ "openai/whisper-medium (Multilingual)",
18
+ "openai/whisper-large-v3 (Multilingual)",
19
+ "openai/whisper-large-v3-turbo (Multilingual)",
20
  ]
21
 
22
 
23
+ def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
24
  if not Path(model_dir).is_dir():
25
  return None, f"⚠️ Couldn't find local model directory: {model_dir}"
26
  from transformers import (
 
31
  )
32
 
33
  processor = WhisperProcessor.from_pretrained(model_dir)
34
+ tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
 
 
35
  feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
36
  model = WhisperForConditionalGeneration.from_pretrained(model_dir)
37
 
 
44
  ), f"✅ Local model has been loaded from {model_dir}."
45
 
46
 
47
+ def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
48
  if not repo_exists(model_repo_id):
49
  return (
50
  None,
 
53
  return pipeline(
54
  "automatic-speech-recognition",
55
  model=model_repo_id,
 
56
  ), f"✅ HF Model {model_repo_id} has been loaded."
57
 
58
 
59
  def load_model(
60
+ dropdown_model_id: str, hf_model_id: str, local_model_id: str
61
  ) -> Tuple[Pipeline, str]:
62
  if dropdown_model_id and not hf_model_id and not local_model_id:
63
+ dropdown_model_id = dropdown_model_id.split(" (")[0]
64
  yield None, f"Loading {dropdown_model_id}..."
65
+ yield _load_hf_model(dropdown_model_id)
66
  elif hf_model_id and not local_model_id and not dropdown_model_id:
67
  yield None, f"Loading {hf_model_id}..."
68
+ yield _load_hf_model(hf_model_id)
69
  elif local_model_id and not hf_model_id and not dropdown_model_id:
70
  yield None, f"Loading {local_model_id}..."
71
+ yield _load_local_model(local_model_id)
72
  else:
73
  yield (
74
  None,
75
  "️️⚠️ Please select or fill at least and only one of the options above",
76
  )
 
 
77
 
78
 
79
+ @spaces.GPU
80
  def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
81
  text = pipe(audio)["text"]
82
  return text
 
86
  with gr.Blocks() as demo:
87
  gr.Markdown(
88
  """ # 🗣️ Speech-to-Text Transcription
89
+ ### 1. Select which model to load from one of the options below.
90
+ ### 2. Load the model by clicking the Load model button.
91
+ ### 3. Record a message or upload an audio file.
92
+ ### 4. Click Transcribe to see the transcription generated by the model.
 
93
  """
94
  )
95
+ ### Model selection ###
 
 
 
 
96
 
97
  with gr.Row():
98
  with gr.Column():
 
128
  model = gr.State()
129
  load_model_button.click(
130
  fn=load_model,
131
+ inputs=[dropdown_model, user_model, local_model],
132
  outputs=[model, model_loaded],
133
  )
134