import gradio as gr
import torch
from nemo.collections.asr.models import EncDecSpeakerLabelModel
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
STYLE = """
"""
OUTPUT_OK = (
STYLE
+ """
The provided samples are
Same Speakers!!!
"""
)
OUTPUT_FAIL = (
STYLE
+ """
The provided samples are from
Different Speakers!!!
"""
)
THRESHOLD = 0.80
model_name = "nvidia/speakerverification_en_titanet_large"
model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)
def compare_samples(path1, path2):
if not (path1 and path2):
return 'ERROR: Please record audio for *both* speakers!'
output = model.verify_speakers(path1,path2,THRESHOLD)
return OUTPUT_OK if output else OUTPUT_FAIL
inputs = [
gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"),
gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"),
]
output = gr.outputs.HTML(label="")
description = (
"This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n"
"You can attempt this exercise using your own voice."
)
article = (
"