Spaces:

Walid-Ahmed
/

summarize_audio_file

Sleeping

File size: 1,619 Bytes

a2a46d0
ee9acb0
a2a46d0
dc1d260
 
 
a2a46d0
dc1d260
 
 
b94d057
dc1d260
 
b94d057
dc1d260
 
a2a46d0
dc1d260
 
 
a2a46d0
dc1d260
 
a2a46d0
dc1d260
 
 
 
 
 
a2a46d0
dc1d260
 
 
 
 
 
 
 
 
 
 
 
 
a2a46d0
 
dc1d260



import whisper
import gradio as gr
from accelerate import init_empty_weights, load_checkpoint_and_dispatch
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

# Initialize the device map for ZeRO
from accelerate.utils import set_module_tensor_to_device
import torch

device_map = "auto"  # Automatically allocate layers across available GPUs/CPUs
print(f"Using ZeRO-powered device map: {device_map}")

# Load the model using ZeRO
model_name = "openai/whisper-tiny"

# Load the Whisper model into ZeRO's memory-efficient mode
with init_empty_weights():
    whisper_model = whisper.load_model(model_name)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Load model with Accelerate/ZeRO
whisper_model = load_checkpoint_and_dispatch(
    whisper_model,
    device_map=device_map,
    dtype=torch.float16  # Optional: Use mixed precision for further optimization
)

# Define the transcription function
def transcribe(audio):
    # Perform transcription using the Whisper model
    result = whisper_model.transcribe(audio)
    return result['text']

# Create the Gradio interface
demo = gr.Interface(
    fn=transcribe,  # The function to be called for transcription
    inputs=gr.Audio(source="microphone", type="filepath", label="Speak into the microphone"),  # Input audio
    outputs=gr.Textbox(label="Transcription"),  # Output transcription
    title="Whisper Speech-to-Text with ZeRO",  # Title of the interface
    description="Record audio using your microphone and get a transcription using the Whisper model optimized by ZeRO."
)

# Launch the Gradio interface
demo.launch()