Spaces:
Running
on
Zero
Running
on
Zero
from pathlib import Path | |
from typing import Tuple | |
import gradio as gr | |
from transformers import pipeline, Pipeline | |
from huggingface_hub import repo_exists | |
from speech_to_text_finetune.config import LANGUAGES_NAME_TO_ID | |
languages = LANGUAGES_NAME_TO_ID.keys() | |
model_ids = [ | |
"", | |
"openai/whisper-tiny", | |
"openai/whisper-small", | |
"openai/whisper-medium", | |
"openai/whisper-large-v3", | |
"openai/whisper-large-v3-turbo", | |
] | |
def _load_local_model(model_dir: str, language: str) -> Tuple[Pipeline | None, str]: | |
if not Path(model_dir).is_dir(): | |
return None, f"⚠️ Couldn't find local model directory: {model_dir}" | |
from transformers import ( | |
WhisperProcessor, | |
WhisperTokenizer, | |
WhisperFeatureExtractor, | |
WhisperForConditionalGeneration, | |
) | |
processor = WhisperProcessor.from_pretrained(model_dir) | |
tokenizer = WhisperTokenizer.from_pretrained( | |
model_dir, language=language, task="transcribe" | |
) | |
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir) | |
model = WhisperForConditionalGeneration.from_pretrained(model_dir) | |
return pipeline( | |
task="automatic-speech-recognition", | |
model=model, | |
processor=processor, | |
tokenizer=tokenizer, | |
feature_extractor=feature_extractor, | |
), f"✅ Local model has been loaded from {model_dir}." | |
def _load_hf_model(model_repo_id: str, language: str) -> Tuple[Pipeline | None, str]: | |
if not repo_exists(model_repo_id): | |
return ( | |
None, | |
f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.", | |
) | |
return pipeline( | |
"automatic-speech-recognition", | |
model=model_repo_id, | |
generate_kwargs={"language": language}, | |
), f"✅ HF Model {model_repo_id} has been loaded." | |
def load_model( | |
language: str, dropdown_model_id: str, hf_model_id: str, local_model_id: str | |
) -> Tuple[Pipeline, str]: | |
if dropdown_model_id and not hf_model_id and not local_model_id: | |
yield None, f"Loading {dropdown_model_id}..." | |
yield _load_hf_model(dropdown_model_id, language) | |
elif hf_model_id and not local_model_id and not dropdown_model_id: | |
yield None, f"Loading {hf_model_id}..." | |
yield _load_hf_model(hf_model_id, language) | |
elif local_model_id and not hf_model_id and not dropdown_model_id: | |
yield None, f"Loading {local_model_id}..." | |
yield _load_local_model(local_model_id, language) | |
else: | |
yield ( | |
None, | |
"️️⚠️ Please select or fill at least and only one of the three options above", | |
) | |
if not language: | |
yield None, "⚠️ Please select a language from the dropdown" | |
def transcribe(pipe: Pipeline, audio: gr.Audio) -> str: | |
text = pipe(audio)["text"] | |
return text | |
def setup_gradio_demo(): | |
with gr.Blocks() as demo: | |
gr.Markdown( | |
""" # 🗣️ Speech-to-Text Transcription | |
### 1. Select a language from the dropdown menu. | |
### 2. Select which model to load from one of the 3 options | |
### 3. Load the model by clicking the Load model button. | |
### 4. Record a message and click Transcribe to see the transcription. | |
""" | |
) | |
### Language & Model selection ### | |
selected_lang = gr.Dropdown( | |
choices=list(languages), value=None, label="Select a language" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
dropdown_model = gr.Dropdown( | |
choices=model_ids, label="Option 1: Select a model" | |
) | |
with gr.Column(): | |
user_model = gr.Textbox( | |
label="Option 2: Paste HF model id", | |
placeholder="my-username/my-whisper-tiny", | |
) | |
with gr.Column(): | |
local_model = gr.Textbox( | |
label="Option 3: Paste local path to model directory", | |
placeholder="artifacts/my-whisper-tiny", | |
) | |
load_model_button = gr.Button("Load model") | |
model_loaded = gr.Markdown() | |
### Transcription ### | |
audio_input = gr.Audio( | |
sources=["microphone"], type="filepath", label="Record a message" | |
) | |
transcribe_button = gr.Button("Transcribe") | |
transcribe_output = gr.Text(label="Output") | |
### Event listeners ### | |
model = gr.State() | |
load_model_button.click( | |
fn=load_model, | |
inputs=[selected_lang, dropdown_model, user_model, local_model], | |
outputs=[model, model_loaded], | |
) | |
transcribe_button.click( | |
fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output | |
) | |
demo.launch() | |
if __name__ == "__main__": | |
setup_gradio_demo() | |