Spaces:

nithinraok
/

titanet-speaker-verification

Running

App Files Files Community

titanet-speaker-verification / app.py

nithinraok

Update app.py

8ed98a1 over 2 years ago

raw

history blame

2.9 kB

	import gradio as gr
	import torch
	from nemo.collections.asr.models import EncDecSpeakerLabelModel


	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	STYLE = """
	<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
	"""
	OUTPUT_OK = (
	STYLE
	+ """
	<div class="container">
	<div class="row"><h1 style="text-align: center">The provided samples are</h1></div>
	<div class="row"><h1 class="text-success" style="text-align: center">Same Speakers!!!</h1></div>
	</div>
	"""
	)
	OUTPUT_FAIL = (
	STYLE
	+ """
	<div class="container">
	<div class="row"><h1 style="text-align: center">The provided samples are from </h1></div>
	<div class="row"><h1 class="text-danger" style="text-align: center">Different Speakers!!!</h1></div>
	</div>
	"""
	)

	THRESHOLD = 0.80

	model_name = "nvidia/speakerverification_en_titanet_large"
	model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)


	def compare_samples(path1, path2):
	if not (path1 and path2):
	return '<b style="color:red">ERROR: Please record audio for both speakers!</b>'

	output = model.verify_speakers(path1,path2,THRESHOLD)

	return OUTPUT_OK if output else OUTPUT_FAIL


	inputs = [
	gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"),
	gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"),
	]
	output = gr.outputs.HTML(label="")


	description = (
	"This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n"
	"You can attempt this exercise using your own voice."
	)
	article = (
	"<p style='text-align: center'>"
	"<a href='https://huggingface.co/nvidia/speakerverification_en_titanet_large' target='_blank'>🎙️ Learn more about TitaNet model</a> \| "
	"<a href='https://arxiv.org/pdf/2110.04410.pdf' target='_blank'>📚 TitaNet paper</a> \| "
	"<a href='https://github.com/NVIDIA/NeMo' target='_blank'>🧑‍💻 Repository</a>"
	"</p>"
	)
	examples = [
	["data/id10270_5r0dWxy17C8-00001.wav", "data/id10270_5r0dWxy17C8-00002.wav"],
	["data/id10271_1gtz-CUIygI-00001.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
	["data/id10270_5r0dWxy17C8-00001.wav", "data/id10271_1gtz-CUIygI-00001.wav"],
	["data/id10270_5r0dWxy17C8-00002.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
	]

	interface = gr.Interface(
	fn=compare_samples,
	inputs=inputs,
	outputs=output,
	title="Speaker Verification with TitaNet Embeddings",
	description=description,
	article=article,
	layout="horizontal",
	theme="huggingface",
	allow_flagging=False,
	live=False,
	examples=examples,
	)
	interface.launch(enable_queue=True)