Spaces:
Runtime error
Runtime error
File size: 1,337 Bytes
aca48d5 e3e59a6 aca48d5 e3e59a6 aca48d5 da550b3 e3e59a6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
import torch
import soundfile as sf
# Correctly load the Wav2Vec2Processor and model
processor = Wav2Vec2Processor.from_pretrained("facebook/wav2vec2-large-xlsr-53")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-large-xlsr-53")
def transcribe_audio(audio):
"""
Takes an audio file, processes it using Hugging Face Wav2Vec2 model,
and returns the transcribed text.
"""
# Read the audio file
audio_input, _ = sf.read(audio.name)
# Process audio input using the processor
input_values = processor(audio_input, return_tensors="pt").input_values
# Get model logits (raw prediction)
logits = model(input_values).logits
# Decode the prediction into text
predicted_ids = torch.argmax(logits, dim=-1)
transcription = processor.batch_decode(predicted_ids)
return transcription[0]
# Create a Gradio interface for users to upload audio files
iface = gr.Interface(fn=transcribe_audio,
inputs=gr.Audio(source="upload", type="file"),
outputs="text",
title="Voice Login System",
description="Upload an audio file for transcription using Wav2Vec2 model.")
iface.launch()
|