Spaces:

space-sue
/

hf-speech-eval

Sleeping

App Files Files Community

hf-speech-eval / app.py

space-sue

Update app.py

8f0642b almost 2 years ago

raw

history blame contribute delete

2.65 kB


	import gradio as gr
	import torch.cuda
	import whisper
	from whisper.tokenizer import LANGUAGES
	from vid_to_wav import extract_audio
	gpu = torch.cuda.is_available()
	model = None


	def analyze_transcription(text, duration):
	word_count = len(text.split())
	analysis_text = "The video is {} sec. long and the speaker speaks {} words.".format(
	duration, word_count)
	duration_in_min = duration/60
	words_per_min = round(word_count /duration_in_min)
	analysis_text = analysis_text + "The speech speed is {} words-per-minute".format(words_per_min)
	if words_per_min < 130:
	analysis_text = analysis_text + "The speaker has spoken slowly that average speakers"
	elif words_per_min > 150:
	analysis_text = analysis_text + "The speaker has spoken faster that average speakers"
	else:
	analysis_text = analysis_text + "The speaker maintains normal speed during speech making the speech comprehensible to most audiences!"
	return analysis_text


	def transcribe(filepath, language, task):
	print(filepath)
	audio, audio_file, duration = extract_audio(filepath)
	print(type)
	language = None if language == "Detect" else language
	text = model.transcribe(
	audio_file, task=task.lower(), language=language, fp16=gpu,
	)["text"].strip()
	return text, analyze_transcription(text, duration)


	def get_interface(model_name="medium"):
	global model
	model = whisper.load_model(model_name)

	return gr.Interface(
	fn=transcribe,
	inputs=[
	# gr.Audio(label="Record", source="microphone", type="filepath"),
	gr.Video(label="Upload", source="upload", type="filepath"),
	gr.Dropdown(
	label="Language",
	choices=["Detect"] + sorted([i.title()
	for i in LANGUAGES.values()]),
	value="Detect",
	),
	gr.Dropdown(
	label="Task",
	choices=["Transcribe", "Translate"],
	value="Transcribe",
	info="Whether to perform X->X speech recognition or X->English translation",
	),
	],
	outputs=[
	gr.Textbox(label="Transcription", lines=26),
	gr.Textbox(label="Speech Analysis", lines=4)],
	# theme=gr.themes.Default(),
	theme=gr.themes.Glass(
	primary_hue=gr.themes.colors.orange, secondary_hue=gr.themes.colors.purple),
	title="Analysis of Speech from Video",
	# description=DESCRIPTION,
	allow_flagging="never",
	)


	demo = get_interface()
	demo.queue().launch(debug=True)