File size: 1,123 Bytes
94ac2ac f9a3e58 186a3b7 c1dd4e9 186a3b7 f9a3e58 62e68d5 f9a3e58 62e68d5 f9a3e58 62e68d5 f9a3e58 62e68d5 f9a3e58 06c4ac4 f9a3e58 c1dd4e9 f9a3e58 06c4ac4 f9a3e58 94ac2ac f9a3e58 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
import streamlit as st
import librosa
from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq
uploaded_file = st.file_uploader("上传文件", type="wav")
processor = AutoProcessor.from_pretrained("Yehor/whisper-small-ukrainian")
model = AutoModelForSpeechSeq2Seq.from_pretrained("Yehor/whisper-small-ukrainian")
def map_to_pred(file_path):
# load audio file
audio, _ = librosa.load(file_path)
# preprocess audio and generate standard
input_features = processor([audio], return_tensors="pt", sampling_rate=16_000).input_features
generated_ids = model.generate(inputs=input_features)
transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
text = processor.tokenizer._normalize(transcription[0])
return text
if uploaded_file is not None:
# convert file object to file path
file_path = './temp.wav'
with open(file_path, 'wb') as f:
f.write(uploaded_file.getbuffer())
text = map_to_pred(file_path)
# display results
st.write('Input audio:', uploaded_file.name)
st.write('Predicted standard:', text) |