Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,849 Bytes
ff71374 2fa9a5c 8bbb796 d0aba40 8bbb796 2fa9a5c 8bbb796 ff71374 8bbb796 5e6ab01 d0aba40 8bbb796 2fa9a5c 5e6ab01 d0aba40 5e6ab01 2fa9a5c 5e6ab01 2fa9a5c 5e6ab01 2fa9a5c 5e6ab01 2fa9a5c 5e6ab01 d0aba40 2fa9a5c 5e6ab01 2fa9a5c 5e6ab01 2fa9a5c 8bbb796 2fa9a5c 8bbb796 d0aba40 8bbb796 d0aba40 5e6ab01 ff71374 5e6ab01 2fa9a5c 8bbb796 b669864 8bbb796 2fa9a5c 8bbb796 2fa9a5c 8bbb796 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import os
from pathlib import Path
from typing import Tuple
import gradio as gr
import spaces
from transformers import pipeline, Pipeline
from huggingface_hub import repo_exists
is_hf_space = os.getenv("IS_HF_SPACE")
model_ids = [
"",
"mozilla-ai/whisper-small-gl (Galician)",
"mozilla-ai/whisper-small-el (Greek)",
"openai/whisper-tiny (Multilingual)",
"openai/whisper-small (Multilingual)",
"openai/whisper-medium (Multilingual)",
"openai/whisper-large-v3 (Multilingual)",
"openai/whisper-large-v3-turbo (Multilingual)",
]
def _load_local_model(model_dir: str) -> Tuple[Pipeline | None, str]:
if not Path(model_dir).is_dir():
return None, f"⚠️ Couldn't find local model directory: {model_dir}"
from transformers import (
WhisperProcessor,
WhisperTokenizer,
WhisperFeatureExtractor,
WhisperForConditionalGeneration,
)
processor = WhisperProcessor.from_pretrained(model_dir)
tokenizer = WhisperTokenizer.from_pretrained(model_dir, task="transcribe")
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_dir)
model = WhisperForConditionalGeneration.from_pretrained(model_dir)
return pipeline(
task="automatic-speech-recognition",
model=model,
processor=processor,
tokenizer=tokenizer,
feature_extractor=feature_extractor,
), f"✅ Local model has been loaded from {model_dir}."
def _load_hf_model(model_repo_id: str) -> Tuple[Pipeline | None, str]:
if not repo_exists(model_repo_id):
return (
None,
f"⚠️ Couldn't find {model_repo_id} on Hugging Face. If its a private repo, make sure you are logged in locally.",
)
return pipeline(
"automatic-speech-recognition",
model=model_repo_id,
), f"✅ HF Model {model_repo_id} has been loaded."
def load_model(
dropdown_model_id: str, hf_model_id: str, local_model_id: str
) -> Tuple[Pipeline, str]:
if dropdown_model_id and not hf_model_id and not local_model_id:
dropdown_model_id = dropdown_model_id.split(" (")[0]
yield None, f"Loading {dropdown_model_id}..."
yield _load_hf_model(dropdown_model_id)
elif hf_model_id and not local_model_id and not dropdown_model_id:
yield None, f"Loading {hf_model_id}..."
yield _load_hf_model(hf_model_id)
elif local_model_id and not hf_model_id and not dropdown_model_id:
yield None, f"Loading {local_model_id}..."
yield _load_local_model(local_model_id)
else:
yield (
None,
"️️⚠️ Please select or fill at least and only one of the options above",
)
@spaces.GPU
def transcribe(pipe: Pipeline, audio: gr.Audio) -> str:
text = pipe(audio)["text"]
return text
def setup_gradio_demo():
with gr.Blocks() as demo:
gr.Markdown(
""" # 🗣️ Speech-to-Text Transcription
### 1. Select which model to load from one of the options below.
### 2. Load the model by clicking the Load model button.
### 3. Record a message or upload an audio file.
### 4. Click Transcribe to see the transcription generated by the model.
"""
)
### Model selection ###
with gr.Row():
with gr.Column():
dropdown_model = gr.Dropdown(
choices=model_ids, label="Option 1: Select a model"
)
with gr.Column():
user_model = gr.Textbox(
label="Option 2: Paste HF model id",
placeholder="my-username/my-whisper-tiny",
)
with gr.Column(visible=not is_hf_space):
local_model = gr.Textbox(
label="Option 3: Paste local path to model directory",
placeholder="artifacts/my-whisper-tiny",
)
load_model_button = gr.Button("Load model")
model_loaded = gr.Markdown()
### Transcription ###
audio_input = gr.Audio(
sources=["microphone", "upload"],
type="filepath",
label="Record a message / Upload audio file",
show_download_button=True,
max_length=30,
)
transcribe_button = gr.Button("Transcribe")
transcribe_output = gr.Text(label="Output")
### Event listeners ###
model = gr.State()
load_model_button.click(
fn=load_model,
inputs=[dropdown_model, user_model, local_model],
outputs=[model, model_loaded],
)
transcribe_button.click(
fn=transcribe, inputs=[model, audio_input], outputs=transcribe_output
)
demo.launch()
if __name__ == "__main__":
setup_gradio_demo()
|