ASR_for_Luxembourgish_w2v

Runtime error

App Files Files Community

ASR_for_Luxembourgish_w2v / app.py

pgilles

Update app.py

81c2355 over 2 years ago

raw

history blame contribute delete

2.71 kB

	# from: https://gradio.app/real_time_speech_recognition/

	from transformers import pipeline, Wav2Vec2CTCTokenizer, Wav2Vec2ForCTC, Wav2Vec2ProcessorWithLM
	import pyctcdecode
	import kenlm
	import torch
	import gradio as gr
	import librosa
	import os
	import time

	#Loading the model and the tokenizer
	token_key = os.environ.get("HUGGING_FACE_HUB_TOKEN")

	model_name = "unilux/Wav2Vec2-large-xlsr-1b-LUXEMBOURGISH33-with-LM"

	tokenizer = Wav2Vec2CTCTokenizer.from_pretrained(model_name, use_auth_token=token_key)
	model = Wav2Vec2ForCTC.from_pretrained(model_name, use_auth_token=token_key)
	processor = Wav2Vec2ProcessorWithLM.from_pretrained(model_name, use_auth_token=token_key)


	pipe = pipeline("automatic-speech-recognition", model=model, tokenizer=tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder, use_auth_token=token_key)

	def load_data(input_file):

	""" Function for resampling to ensure that the speech input is sampled at 16KHz.
	"""
	sampling_rate = 16_000
	#read the file
	speech, sample_rate = librosa.load(input_file, sr=sampling_rate, mono=True)
	#speech = librosa.effects.trim(speech, top_db= 10)
	return speech

	def asr_pipe(input_file, input_file_microphone, chunks):
	input_file = input_file_microphone if input_file_microphone else input_file
	transcription = pipe(input_file, chunk_length_s= chunks)["text"]

	return transcription

	inputs = [gr.inputs.Audio(source="upload", type='filepath', label="Eng Audio-Datei eroplueden...", optional = True),
	gr.inputs.Audio(source="microphone", type="filepath", label="... oder direkt mam Mikro ophuelen", optional = True),
	gr.Slider(minimum=3, maximum=32, value=29, step=0.5, label="Chunk Length")]

	outputs = [gr.outputs.Textbox(label="Erkannten Text")]

	samples = [["Chamber2022_1.wav", "Chamber2022_1.wav", 8], ["Chamber2022_2.wav", "Chamber2022_2.wav", 8], ["Chamber2022_3.wav", "Chamber2022_3.wav", 8], ["Erlieft-a-Verzielt.wav", "Erlieft-a-Verzielt.wav", 8]]

	gr.Interface(fn = asr_pipe,
	inputs = inputs,
	outputs = outputs,
	title="Sproocherkennung fir d'Lëtzebuergescht @uni.lu, based on wav2vec2 XLS-R-1B",
	description = "Dës App convertéiert Är geschwate Sprooch an de (méi oder manner richtegen ;-)) Text!",
	examples = samples,
	examples_per_page = 10,
	article = "Beschreiwung: Dir kënnt Iech selwer iwwer de Mikro ophuelen, eng Datei eroplueden oder e Beispill auswielen. Dëse Modell ass trainéiert mam wav2vec 2.0-Algorithmus vu Meta mat enger Milliard Parametern (wav2vec2-large-xls-r-1B).",
	theme="default").launch(share=False, show_error=True)