Spaces:
Sleeping
Sleeping
File size: 1,619 Bytes
a2a46d0 ee9acb0 a2a46d0 dc1d260 a2a46d0 dc1d260 b94d057 dc1d260 b94d057 dc1d260 a2a46d0 dc1d260 a2a46d0 dc1d260 a2a46d0 dc1d260 a2a46d0 dc1d260 a2a46d0 dc1d260 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
import whisper
import gradio as gr
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
# Initialize the device map for ZeRO
from accelerate.utils import set_module_tensor_to_device
import torch
device_map = "auto" # Automatically allocate layers across available GPUs/CPUs
print(f"Using ZeRO-powered device map: {device_map}")
# Load the model using ZeRO
model_name = "openai/whisper-tiny"
# Load the Whisper model into ZeRO's memory-efficient mode
with init_empty_weights():
whisper_model = whisper.load_model(model_name)
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Load model with Accelerate/ZeRO
whisper_model = load_checkpoint_and_dispatch(
whisper_model,
device_map=device_map,
dtype=torch.float16 # Optional: Use mixed precision for further optimization
)
# Define the transcription function
def transcribe(audio):
# Perform transcription using the Whisper model
result = whisper_model.transcribe(audio)
return result['text']
# Create the Gradio interface
demo = gr.Interface(
fn=transcribe, # The function to be called for transcription
inputs=gr.Audio(source="microphone", type="filepath", label="Speak into the microphone"), # Input audio
outputs=gr.Textbox(label="Transcription"), # Output transcription
title="Whisper Speech-to-Text with ZeRO", # Title of the interface
description="Record audio using your microphone and get a transcription using the Whisper model optimized by ZeRO."
)
# Launch the Gradio interface
demo.launch()
|