Spaces:

nimool
/

gbn_test

Sleeping

App Files Files Community

gbn_test / app.py

nimool

Update app.py

645c5d6 almost 2 years ago

raw

history blame

2.17 kB

	import soundfile as sf
	import torch
	from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor
	import gradio as gr
	import sox
	import subprocess
	from fuzzywuzzy import fuzz

	def read_file_and_process(wav_file):
	filename = wav_file.split('.')[0]
	filename_16k = filename + "16k.wav"
	resampler(wav_file, filename_16k)
	speech, _ = sf.read(filename_16k)
	inputs = processor(speech, sampling_rate=16_000, return_tensors="pt", padding=True)

	return inputs


	def resampler(input_file_path, output_file_path):
	command = (
	f"ffmpeg -hide_banner -loglevel panic -i {input_file_path} -ar 16000 -ac 1 -bits_per_raw_sample 16 -vn "
	f"{output_file_path}"
	)
	subprocess.call(command, shell=True)


	def parse_transcription(logits):
	predicted_ids = torch.argmax(logits, dim=-1)
	transcription = processor.decode(predicted_ids[0], skip_special_tokens=True)
	return transcription


	def parse(wav_file):
	input_values = read_file_and_process(wav_file)
	with torch.no_grad():
	logits = model(**input_values).logits
	return parse_transcription(logits)


	model_id = "jonatasgrosman/wav2vec2-large-xlsr-53-persian"
	processor = Wav2Vec2Processor.from_pretrained(model_id)
	model = Wav2Vec2ForCTC.from_pretrained(model_id)


	input_ = gr.Audio(source="microphone", type="filepath")
	txtbox = gr.Textbox(
	label="persian text output:",
	lines=5,
	placeholder="متن نوشتاری گفتار شما",
	show_label=True,
	container=True,
	text_align="right",
	show_copy_button=True,
	)

	title = "Speech-to-Text (persian)"
	description = "Upload a prsian audio, and let AI do the hard work of transcribing."
	article = "<p style='text-align: center'><a href='https://github.com/nimaprgrmr'>Large-Scale Self- and Semi-Supervised Learning for Speech Translation</a></p>"



	demo = gr.Interface(fn=parse, inputs = input_, outputs=txtbox, title=title, description=description, article = article,
	streaming=True, interactive=True,
	analytics_enabled=False, show_tips=False, enable_queue=True)
	demo.launch(share=True)