Spaces:

nithinraok
/

titanet-speaker-verification

Running

App Files Files Community

nithinraok commited on Mar 13, 2023

Commit

8ed98a1

1 Parent(s): 6efc48a

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -0

app.py CHANGED Viewed

	@@ -0,0 +1,83 @@

+import gradio as gr
+import torch
+from nemo.collections.asr.models import EncDecSpeakerLabelModel
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+STYLE = """
+<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
+"""
+OUTPUT_OK = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">The provided samples are</h1></div>
+        <div class="row"><h1 class="text-success" style="text-align: center">Same Speakers!!!</h1></div>
+    </div>
+"""
+)
+OUTPUT_FAIL = (
+    STYLE
+    + """
+    <div class="container">
+        <div class="row"><h1 style="text-align: center">The provided samples are from </h1></div>
+        <div class="row"><h1 class="text-danger" style="text-align: center">Different Speakers!!!</h1></div>
+    </div>
+"""
+)
+THRESHOLD = 0.80
+model_name = "nvidia/speakerverification_en_titanet_large"
+model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)
+def compare_samples(path1, path2):
+    if not (path1 and path2):
+        return '<b style="color:red">ERROR: Please record audio for *both* speakers!</b>'
+    output = model.verify_speakers(path1,path2,THRESHOLD)
+    return OUTPUT_OK if output else OUTPUT_FAIL
+inputs = [
+    gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"),
+    gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"),
+]
+output = gr.outputs.HTML(label="")
+description = (
+    "This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n"
+    "You can attempt this exercise using your own voice."
+)
+article = (
+    "<p style='text-align: center'>"
+    "<a href='https://huggingface.co/nvidia/speakerverification_en_titanet_large' target='_blank'>🎙️ Learn more about TitaNet model</a> | "
+    "<a href='https://arxiv.org/pdf/2110.04410.pdf' target='_blank'>📚 TitaNet paper</a> | "
+    "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>🧑‍💻 Repository</a>"
+    "</p>"
+)
+examples = [
+    ["data/id10270_5r0dWxy17C8-00001.wav", "data/id10270_5r0dWxy17C8-00002.wav"],
+    ["data/id10271_1gtz-CUIygI-00001.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
+    ["data/id10270_5r0dWxy17C8-00001.wav", "data/id10271_1gtz-CUIygI-00001.wav"],
+    ["data/id10270_5r0dWxy17C8-00002.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
+]
+interface = gr.Interface(
+    fn=compare_samples,
+    inputs=inputs,
+    outputs=output,
+    title="Speaker Verification with TitaNet Embeddings",
+    description=description,
+    article=article,
+    layout="horizontal",
+    theme="huggingface",
+    allow_flagging=False,
+    live=False,
+    examples=examples,
+)
+interface.launch(enable_queue=True)