Spaces:

aikitty
/

testing-sandbox-huggingsound

Runtime error

Update app.py

563e0ed verified about 1 year ago

1.19 kB

	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import soundfile as sf
	import gradio as gr

	# Load the pre-trained processor and model
	processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")
	model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-chinese-zh-cn")

	def speech_to_text(audio):
	# Load audio file
	speech, sample_rate = sf.read(audio)

	# Preprocess the audio file
	inputs = processor(speech, sampling_rate=sample_rate, return_tensors="pt", padding=True)

	# Perform inference
	with torch.no_grad():
	logits = model(**inputs).logits

	# Decode the predicted ids to text
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.batch_decode(predicted_ids)

	return transcription[0]

	# Create the Gradio interface
	iface = gr.Interface(
	fn=speech_to_text,
	inputs=gr.Audio(type="filepath"),
	outputs=gr.Textbox(),
	title="Chinese Speech Recognition",
	description="Upload an audio file and get the transcribed text using the wav2vec2-large-xlsr-53-chinese-zh-cn model."
	)

	if __name__ == "__main__":
	iface.launch()