Spaces:

Neilblaze
/

WhisperAnything

Sleeping

App Files Files Community

WhisperAnything / app.py

Neilblaze

Playing around xD

ee5ce46 unverified over 1 year ago

raw

history blame

1.99 kB

	from multilingual_translation import text_to_text_generation
	from utils import lang_ids, data_scraping
	import whisper
	import gradio as gr

	lang_list = list(lang_ids.keys())
	model_list = data_scraping()
	model = whisper.load_model("small")

	def transcribe(audio):

	#time.sleep(3)
	# load audio and pad/trim it to fit 30 seconds
	audio = whisper.load_audio(audio)
	audio = whisper.pad_or_trim(audio)

	# make log-Mel spectrogram and move to the same device as the model
	mel = whisper.log_mel_spectrogram(audio).to(model.device)

	# detect the spoken language
	_, probs = model.detect_language(mel)
	print(f"Detected language: {max(probs, key=probs.get)}")

	# decode the audio
	options = whisper.DecodingOptions(fp16 = False)
	result = whisper.decode(model, mel, options)

	finalResult = text_to_text_generation(prompt='return.text', model_id='facebook/m2m100_418M', device='cpu',target_lang='English')
	return finalResult

	# api endpoint to return the transcription in EN as a json response

	# @app.route('/transcribe', methods=['POST'])
	# def transcribe_api():
	# if request.method == 'POST':
	# audio = request.files['audio']
	# audio = audio.read()
	# audio = io.BytesIO(audio)
	# audio = whisper.load_audio(audio)
	# audio = whisper.pad_or_trim(audio)
	# mel = whisper.log_mel_spectrogram(audio).to(model.device)
	# _, probs = model.detect_language(mel)
	# print(f"Detected language: {max(probs, key=probs.get)}")
	# options = whisper.DecodingOptions(fp16 = False)
	# result = whisper.decode(model, mel, options)
	# return jsonify(result)





	gr.Interface(
	title = 'OpenAI Whisper ASR Gradio Web UI',
	fn=transcribe,
	inputs=[
	gr.inputs.Audio(source="microphone", type="filepath")
	],
	outputs=[
	"textbox"
	],
	live=True).launch(debug=True, enable_queue=True)

	# output = gr.outputs.Textbox(label="Output Text")