Spaces:
Runtime error
Runtime error
File size: 1,434 Bytes
4e21035 fb45ec7 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 6594bab 4e21035 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import os
import gradio as gr
import whisper
import librosa
import torch
from transformers import Wav2Vec2Processor, Wav2Vec2Tokenizer
device = "cuda" if torch.cuda.is_available() else "cpu"
def audio_to_text(audio):
model = whisper.load_model("base")
audio = whisper.load_audio(audio)
result = model.transcribe(audio)
return result["text"]
# tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
# logits = preprocess(audio)
# predicted_ids = torch.argmax(logits, dim=-1)
# transcriptions = tokenizer.decode(predicted_ids[0])
# return transcriptions
def preprocess(audio):
model_save_path = "model_save"
model_name = "wav2vec2_osr_version_1"
speech, rate = librosa.load(audio, sr=16000)
model_path = os.path.join(model_save_path, model_name+".pt")
pipeline_path = os.path.join(model_save_path, model_name+"_vocab")
access_token = "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"
processor = Wav2Vec2Processor.from_pretrained(pipeline_path, use_auth_token=access_token)
model = torch.load(model_path)
model.eval()
input_values = processor(speech, sampling_rate=rate, return_tensors="pt").input_values.to(device)
logits = model(input_values).logits
return logits
demo = gr.Interface(
fn=audio_to_text,
inputs=gr.Audio(source="upload", type="filepath"),
examples=[["example.flac"]],
outputs="text"
)
demo.launch() |