Spaces:
Runtime error
Runtime error
import torch | |
import gradio as gr | |
import torchaudio | |
from transformers import AutoModel, AutoProcessor | |
from quanto import qint8, quantize, freeze | |
# Load and quantize the model | |
model_name = "cdactvm/w2v-bert-punjabi" | |
model = AutoModel.from_pretrained(model_name) | |
processor = AutoProcessor.from_pretrained(model_name) | |
# Quantization | |
quantize(model, weights=qint8, activations=None) | |
freeze(model) | |
# Audio transcription function | |
def transcribe(audio): | |
waveform, sample_rate = torchaudio.load(audio) | |
# Ensure 16kHz sample rate | |
if sample_rate != 16000: | |
waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform) | |
# Process audio | |
inputs = processor(waveform.squeeze(0), sampling_rate=16000, return_tensors="pt") | |
# Run inference | |
with torch.no_grad(): | |
logits = model(**inputs).logits | |
# Decode transcription | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = processor.batch_decode(predicted_ids)[0] | |
return transcription | |
# Gradio UI | |
iface = gr.Interface( | |
fn=transcribe, | |
inputs=gr.Audio(source="upload", type="filepath"), | |
outputs="text", | |
title="Punjabi Speech Recognition", | |
description="Upload an audio file and get a Punjabi transcription using a quantized model.", | |
) | |
iface.launch() | |