File size: 3,188 Bytes
3be9ff2
 
 
 
 
 
 
 
 
 
 
 
dd65803
 
 
 
 
 
 
 
 
5a5c4db
dd65803
 
 
 
 
 
 
 
 
 
 
5a5c4db
dd65803
 
 
 
 
 
 
 
3be9ff2
dd65803
 
 
 
 
 
3be9ff2
 
 
e0dec14
3be9ff2
 
 
dd65803
 
9f6e983
 
 
bbbc342
 
9f6e983
 
3be9ff2
 
 
785a1fb
90ebfd8
c44ab12
03f95fc
74a0954
3be9ff2
 
 
 
dd65803
3be9ff2
 
 
b6d851d
cf39d9c
3be9ff2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr

import argparse
import functools

import numpy as np
import torch
from infer_contrast import run

from utils.reader import load_audio
from utils.utility import add_arguments, print_arguments


STYLE = """
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
"""
OUTPUT_OK = (
    STYLE
    + """
    <div class="container">
        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
        <div class="row"><h1 class="display-1 text-success" style="text-align: center">是同一人</h1></div>
        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
        <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
    </div>
"""
)
OUTPUT_FAIL = (
    STYLE
    + """
    <div class="container">
        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
        <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是同一人</h1></div>
        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
        <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
    </div>
"""
)

THRESHOLD = 0.70
def voiceRecognition(audio1,audio2):
    score = run(audio1,audio2)
    if score >= THRESHOLD:
        output = OUTPUT_OK.format(score * 100)
    else:
        output = OUTPUT_FAIL.format(score * 100)
    return output


title = "Voice Recognition"
description = "This voice recognition demo(Chinese Format) is a simple implementation based on ResNet. It used ArcFace Loss and an open source Chinese voice corpus - zhvoice."

inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
          gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]

output = gr.outputs.HTML(label="")
          
article = (
    "<p style='text-align: center'>"
    "<a href='https://github.com/yeyupiaoling/VoiceprintRecognition-Pytorch' target='_blank'>💻 Code Repository</a> | "
    "<a href='https://github.com/fighting41love/zhvoice' target='_blank'>🎙️ zhvoice Dataset</a> | "
    "</p>"
)

examples = [
    ["samples/李云龙1.wav", "samples/李云龙2.wav"],
    ["samples/马保国1.wav", "samples/马保国2.wav"],
    ["samples/周杰伦1.wav", "samples/周杰伦2.wav"],
    ["samples/海绵宝宝1.wav", "samples/派大星.wav"],
    ["samples/海绵宝宝1.wav", "samples/海绵宝宝2.wav"],
    ["samples/周星驰.wav", "samples/吴孟达.wav"]]

interface = gr.Interface(
    fn=voiceRecognition,
    inputs=inputs,
    outputs=output,
    title=title,
    description=description,
    examples=examples,
    examples_per_page=3,
    article=article,
    enable_queue=True)
interface.launch(debug=True,share=True)