Spaces:

YuAnthony
/

Voice-Recognition

Runtime error

File size: 3,188 Bytes

3be9ff2
 
 
 
 
 
 
 
 
 
 
 
dd65803
 
 
 
 
 
 
 
 
5a5c4db
dd65803
 
 
 
 
 
 
 
 
 
 
5a5c4db
dd65803
 
 
 
 
 
 
 
3be9ff2
dd65803
 
 
 
 
 
3be9ff2
 
 
e0dec14
3be9ff2
 
 
dd65803
 
9f6e983
 
 
bbbc342
 
9f6e983
 
3be9ff2
 
 
785a1fb
90ebfd8
c44ab12
03f95fc
74a0954
3be9ff2
 
 
 
dd65803
3be9ff2
 
 
b6d851d
cf39d9c
3be9ff2

import gradio as gr

import argparse
import functools

import numpy as np
import torch
from infer_contrast import run

from utils.reader import load_audio
from utils.utility import add_arguments, print_arguments


STYLE = """
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" integrity="sha256-YvdLHPgkqJ8DVUxjjnGVlMMJtNimJ6dYkowFFvp4kKs=" crossorigin="anonymous">
"""
OUTPUT_OK = (
    STYLE
    + """
    <div class="container">
        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
        <div class="row"><h1 class="display-1 text-success" style="text-align: center">是同一人</h1></div>
        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
        <div class="row"><h1 class="display-1 text-success" style="text-align: center">{:.1f}%</h1></div>
        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
    </div>
"""
)
OUTPUT_FAIL = (
    STYLE
    + """
    <div class="container">
        <div class="row"><h1 style="text-align: center">Speaker1 和 Speaker2</h1></div>
        <div class="row"><h1 class="display-1 text-danger" style="text-align: center">不是同一人</h1></div>
        <div class="row"><h1 style="text-align: center">相似度为:</h1></div>
        <div class="row"><h1 class="text-danger" style="text-align: center">{:.1f}%</h1></div>
        <div class="row"><small style="text-align: center">(相似度为70%以上才能被认为是同一人)</small><div class="row">
    </div>
"""
)

THRESHOLD = 0.70
def voiceRecognition(audio1,audio2):
    score = run(audio1,audio2)
    if score >= THRESHOLD:
        output = OUTPUT_OK.format(score * 100)
    else:
        output = OUTPUT_FAIL.format(score * 100)
    return output


title = "Voice Recognition"
description = "This voice recognition demo(Chinese Format) is a simple implementation based on ResNet. It used ArcFace Loss and an open source Chinese voice corpus - zhvoice."

inputs = [gr.inputs.Audio(source="upload",type="filepath", label="Speaker1"),
          gr.inputs.Audio(source="upload",type="filepath", label="Speaker2")]

output = gr.outputs.HTML(label="")
          
article = (
    "<p style='text-align: center'>"
    "<a href='https://github.com/yeyupiaoling/VoiceprintRecognition-Pytorch' target='_blank'>💻 Code Repository</a> | "
    "<a href='https://github.com/fighting41love/zhvoice' target='_blank'>🎙️ zhvoice Dataset</a> | "
    "</p>"
)

examples = [
    ["samples/李云龙1.wav", "samples/李云龙2.wav"],
    ["samples/马保国1.wav", "samples/马保国2.wav"],
    ["samples/周杰伦1.wav", "samples/周杰伦2.wav"],
    ["samples/海绵宝宝1.wav", "samples/派大星.wav"],
    ["samples/海绵宝宝1.wav", "samples/海绵宝宝2.wav"],
    ["samples/周星驰.wav", "samples/吴孟达.wav"]]

interface = gr.Interface(
    fn=voiceRecognition,
    inputs=inputs,
    outputs=output,
    title=title,
    description=description,
    examples=examples,
    examples_per_page=3,
    article=article,
    enable_queue=True)
interface.launch(debug=True,share=True)