Spaces:

romas-458
/

acr

Sleeping

acr / app.py

roman

try new approach

f9a3e58 about 1 year ago

1.12 kB

	import streamlit as st
	import librosa
	from transformers import AutoProcessor, AutoModelForSpeechSeq2Seq



	uploaded_file = st.file_uploader("上传文件", type="wav")

	processor = AutoProcessor.from_pretrained("Yehor/whisper-small-ukrainian")

	model = AutoModelForSpeechSeq2Seq.from_pretrained("Yehor/whisper-small-ukrainian")

	def map_to_pred(file_path):
	# load audio file
	audio, _ = librosa.load(file_path)

	# preprocess audio and generate standard
	input_features = processor([audio], return_tensors="pt", sampling_rate=16_000).input_features
	generated_ids = model.generate(inputs=input_features)
	transcription = processor.batch_decode(generated_ids, normalize=True, skip_special_tokens=True)
	text = processor.tokenizer._normalize(transcription[0])

	return text
	if uploaded_file is not None:
	# convert file object to file path
	file_path = './temp.wav'
	with open(file_path, 'wb') as f:
	f.write(uploaded_file.getbuffer())

	text = map_to_pred(file_path)

	# display results
	st.write('Input audio:', uploaded_file.name)
	st.write('Predicted standard:', text)