|
|
|
|
|
import whisper |
|
import gradio as gr |
|
from accelerate import init_empty_weights, load_checkpoint_and_dispatch |
|
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer |
|
|
|
|
|
from accelerate.utils import set_module_tensor_to_device |
|
import torch |
|
|
|
device_map = "auto" |
|
print(f"Using ZeRO-powered device map: {device_map}") |
|
|
|
|
|
model_name = "openai/whisper-tiny" |
|
|
|
|
|
with init_empty_weights(): |
|
whisper_model = whisper.load_model(model_name) |
|
|
|
|
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
|
|
whisper_model = load_checkpoint_and_dispatch( |
|
whisper_model, |
|
device_map=device_map, |
|
dtype=torch.float16 |
|
) |
|
|
|
|
|
def transcribe(audio): |
|
|
|
result = whisper_model.transcribe(audio) |
|
return result['text'] |
|
|
|
|
|
demo = gr.Interface( |
|
fn=transcribe, |
|
inputs=gr.Audio(source="microphone", type="filepath", label="Speak into the microphone"), |
|
outputs=gr.Textbox(label="Transcription"), |
|
title="Whisper Speech-to-Text with ZeRO", |
|
description="Record audio using your microphone and get a transcription using the Whisper model optimized by ZeRO." |
|
) |
|
|
|
|
|
demo.launch() |
|
|