Spaces:

Shubham09
/

samplewhisper

Runtime error

App Files Files Community

samplewhisper / app.py

Shubham09

Update app.py

b9fbb26 over 2 years ago

raw

history blame

2.13 kB

	import nltk
	import librosa
	import torch
	import gradio as gr
	from transformers import WhisperProcessor, WhisperForConditionalGeneration, WhisperTokenizer
	nltk.download("punkt")
	from transformers import pipeline



	model_name = "Shubham09/whisper31filescheck"
	processor = WhisperProcessor.from_pretrained(model_name,task="transcribe")
	#tokenizer = WhisperTokenizer.from_pretrained(model_name)
	model = WhisperForConditionalGeneration.from_pretrained(model_name)

	def load_data(input_file):

	#reading the file
	speech, sample_rate = librosa.load(input_file)
	#make it 1-D
	if len(speech.shape) > 1:
	speech = speech[:,0] + speech[:,1]
	#Resampling the audio at 16KHz
	if sample_rate !=16000:
	speech = librosa.resample(speech, sample_rate,16000)
	return speech

	# def correct_casing(input_sentence):

	# sentences = nltk.sent_tokenize(input_sentence)
	# return (' '.join([s.replace(s[0],s[0].capitalize(),1) for s in sentences]))
	pipe = pipeline(model="Shubham09/whisper31filescheck") # change to "your-username/the-name-you-picked"

	def asr_transcript(input_file):
	text = pipe(input_file)["text"]
	return text

	# speech = load_data(input_file)
	# #Tokenize
	# input_features = processor(speech).input_features #, padding="longest" , return_tensors="pt"
	# #input_values = tokenizer(speech, return_tensors="pt").input_values
	# #Take logits
	# logits = model(input_features).logits
	# #Take argmax
	# predicted_ids = torch.argmax(logits, dim=-1)
	# #Get the words from predicted word ids
	# transcription = processor.batch_decode(predicted_ids)
	# #Correcting the letter casing
	# #transcription = correct_casing(transcription.lower())
	# return transcription

	gr.Interface(asr_transcript,
	inputs = gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker"),
	outputs = gr.outputs.Textbox(label="Output Text"),
	title="ASR using Wav2Vec 2.0",
	description = "This application displays transcribed text for given audio input",
	examples = [["Actuator.wav"], ["anomalies.wav"]], theme="grass").launch()