nithinraok commited on
Commit
8ed98a1
Β·
1 Parent(s): 6efc48a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +83 -0
app.py CHANGED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from nemo.collections.asr.models import EncDecSpeakerLabelModel
4
+
5
+
6
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
7
+
8
+ STYLE = """
9
+ <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
10
+ """
11
+ OUTPUT_OK = (
12
+ STYLE
13
+ + """
14
+ <div class="container">
15
+ <div class="row"><h1 style="text-align: center">The provided samples are</h1></div>
16
+ <div class="row"><h1 class="text-success" style="text-align: center">Same Speakers!!!</h1></div>
17
+ </div>
18
+ """
19
+ )
20
+ OUTPUT_FAIL = (
21
+ STYLE
22
+ + """
23
+ <div class="container">
24
+ <div class="row"><h1 style="text-align: center">The provided samples are from </h1></div>
25
+ <div class="row"><h1 class="text-danger" style="text-align: center">Different Speakers!!!</h1></div>
26
+ </div>
27
+ """
28
+ )
29
+
30
+ THRESHOLD = 0.80
31
+
32
+ model_name = "nvidia/speakerverification_en_titanet_large"
33
+ model = EncDecSpeakerLabelModel.from_pretrained(model_name).to(device)
34
+
35
+
36
+ def compare_samples(path1, path2):
37
+ if not (path1 and path2):
38
+ return '<b style="color:red">ERROR: Please record audio for *both* speakers!</b>'
39
+
40
+ output = model.verify_speakers(path1,path2,THRESHOLD)
41
+
42
+ return OUTPUT_OK if output else OUTPUT_FAIL
43
+
44
+
45
+ inputs = [
46
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #1"),
47
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True, label="Speaker #2"),
48
+ ]
49
+ output = gr.outputs.HTML(label="")
50
+
51
+
52
+ description = (
53
+ "This demonstration will analyze two recordings of speech and ascertain whether they have been spoken by the same individual.\n"
54
+ "You can attempt this exercise using your own voice."
55
+ )
56
+ article = (
57
+ "<p style='text-align: center'>"
58
+ "<a href='https://huggingface.co/nvidia/speakerverification_en_titanet_large' target='_blank'>πŸŽ™οΈ Learn more about TitaNet model</a> | "
59
+ "<a href='https://arxiv.org/pdf/2110.04410.pdf' target='_blank'>πŸ“š TitaNet paper</a> | "
60
+ "<a href='https://github.com/NVIDIA/NeMo' target='_blank'>πŸ§‘β€πŸ’» Repository</a>"
61
+ "</p>"
62
+ )
63
+ examples = [
64
+ ["data/id10270_5r0dWxy17C8-00001.wav", "data/id10270_5r0dWxy17C8-00002.wav"],
65
+ ["data/id10271_1gtz-CUIygI-00001.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
66
+ ["data/id10270_5r0dWxy17C8-00001.wav", "data/id10271_1gtz-CUIygI-00001.wav"],
67
+ ["data/id10270_5r0dWxy17C8-00002.wav", "data/id10271_1gtz-CUIygI-00002.wav"],
68
+ ]
69
+
70
+ interface = gr.Interface(
71
+ fn=compare_samples,
72
+ inputs=inputs,
73
+ outputs=output,
74
+ title="Speaker Verification with TitaNet Embeddings",
75
+ description=description,
76
+ article=article,
77
+ layout="horizontal",
78
+ theme="huggingface",
79
+ allow_flagging=False,
80
+ live=False,
81
+ examples=examples,
82
+ )
83
+ interface.launch(enable_queue=True)