Spaces:

mizoru
/

Japanese_pitch

Runtime error

File size: 1,408 Bytes

30f32d0
 
 
 
ed060a2
 
6d84a1e
d13cfa6
42031f0
 
 
 
 
a913d8c
30f32d0
 
 
1654da2
c67e62e
1654da2
dfa07fa
2ac0421
84101d2
2693dff
84101d2
30f32d0
 
 
 
2ac0421
30f32d0
2231179
30f32d0
052cb7b
30f32d0
 
 
8a6918f

import gradio as gr

from fastai.vision.all import *

from fastaudio.core.all import *

matplotlib.rcParams['figure.dpi'] = 300

def get_x(df):
    return df.path
def get_y(df):
    return df.pattern
   
learn = load_learner('xresnet50_pitch3.pkl')

labels = learn.dls.vocab

def predict(Record, Upload):
    if Upload: path = Upload
    else: path = Record
    spec,pred,pred_idx,probs = learn.predict(str(path), with_input=True)
    fig,ax = plt.subplots(figsize=(16,10))
    show_image(spec, ax=ax)
    ax.invert_yaxis()
    return [{labels[i]: float(probs[i]) for i in range(len(labels))}, fig]


title = "Japanese Pitch Accent Pattern Detector"

description = "This model will predict the pitch accent pattern of a word based on the recording of its pronunciation."

article="<p style='text-align: center'><a href='mizoru.github.io/blog' target='_blank'>Blog</a></p>"

examples = [['代わる.mp3'],['大丈夫な.mp3'],['熱くない.mp3'], ['あめー雨.mp3'], ['あめー飴.mp3']]

enable_queue=True

gr.Interface(fn=predict,inputs=[gr.inputs.Audio(source='microphone', type='filepath', optional=True), gr.inputs.Audio(source='upload', type='filepath', optional=True)], outputs=  [gr.outputs.Label(num_top_classes=3), gr.outputs.Image(type="plot", label='Spectrogram')], title=title,description=description,article=article,examples=examples).launch(debug=True,share=True,enable_queue=enable_queue)