File size: 1,278 Bytes
f213555
 
 
 
 
 
 
 
ac01485
f213555
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
import librosa
import matplotlib.pyplot as plt

from train import ASR_Model 
from model_cnn import Model

def pre(audio):
    model = ASR_Model(device='cpu',model_path='model.pth',pinyin_path ='pinyin.txt')

    result = model.predict(audio) 
    s = ''
    for r in result:
      s += r[0]+str(r[1])+' '
    return s

def visualize(audio):
    y, sr = librosa.load(audio, sr=None)

    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(y, sr=sr)
    plt.title("Waveform of the Audio")
    plt.xlabel("Time (s)")
    plt.ylabel("Amplitude")

    image_path = "./waveform.png"
    plt.savefig(image_path, format='png')
    plt.close()

    # print(audio)

    return image_path, pre(audio)

#e = gr.Examples(examples=['./SSB10500228.wav'], inputs=[gr.File(type="filepath")])

demo = gr.Interface(fn=visualize, inputs=gr.File(file_types=['.wav'], label="wav file"),
                    outputs=[gr.Image(type="filepath", label="Waveform"),
                             gr.Textbox(type="text", label="Tone Evaluation Result")],
                    examples=["Examples/中原石化加油站.wav", "Examples/你叫什么名字你的名字.wav", "Examples/来一首许多年以后.wav"],
                    title="Mandarin Tone Evaluation")


demo.launch()