File size: 4,166 Bytes
030ab41
 
 
 
 
 
73e8673
030ab41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e772c
030ab41
 
c4e772c
 
 
 
 
 
 
 
 
030ab41
c4e772c
 
 
 
030ab41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c4e772c
030ab41
 
 
 
 
 
 
 
 
 
 
c4e772c
030ab41
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import gradio as gr
import requests
import json
import os

def send_req(title, audio_file_path):
    url = "https://dev-phonic-api.vuihoc.vn/api/v3/get_score_from_file"

    payload = {'title': title}
    files=[
    ('audio',('temp.wav',open(audio_file_path,'rb'),'audio/wav'))
    ]
    headers = {
    'accept': 'application/json',
    'Authorization': 'Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ODg2MSwiZGV2aWNlSWQiOjg4NjEsImlhdCI6MTcxMTk2MTY2OSwiZXhwIjoxNzE3MTQ1NjY5fQ.8Wtzyx3mKVh7K9_GNzseWdK1NH-hycYdNh1uFsoVvEg'
    }

    response = requests.request("POST", url, headers=headers, data=payload, files=files)

    return response.json()

def create_html_output(real_transcript, is_letter_correct_all_words):
    html_output = "<center><h1>"
    for i, char in enumerate(real_transcript):
        if is_letter_correct_all_words[i] == '1':
            html_output += f"<span style='color:green;'>{char}</span>"
        else:
            html_output += f"<span style='color:red;'>{char}</span>"
    html_output += "</h1></center>"
    return html_output

def create_html_output_ipa(word_score_list):
    html_output = "<center><h1>"
    for word_score in word_score_list:
        for phone_score in word_score["phone_score_list"]:
            if phone_score["quality_score"] == 100:
                html_output += f"<span style='color:green;'>{phone_score['phone_ipa']}</span>"
            else:
                html_output += f"<span style='color:red;'>{phone_score['phone_ipa']}</span>"
        html_output += "<span style='color:red;'> </span>"
    html_output += "</h1></center>"
    return html_output

def download_audio_file(url, filename=None):
    """
    Tải xuống tệp âm thanh từ một URL và lưu nó vào đĩa.

    Args:
        url (str): URL của tệp âm thanh cần tải xuống.
        filename (str, optional): Tên tệp để lưu (mặc định là tên tệp từ URL).
    """


    response = requests.get(url)
    response.raise_for_status()  # Kiểm tra lỗi HTTP

    # Nếu không cung cấp tên tệp, sử dụng tên tệp từ URL
    if not filename:
        filename = url.split("/")[-1]

    with open(filename, "wb") as f:
        f.write(response.content)

    return filename


def pa_check(url_audio, microphone, file_upload, reference_text):
    if url_audio:
        file = download_audio_file(url_audio)
    else:
        if (microphone is not None) and (file_upload is not None):
            warn_output = (
                "WARNING: You've uploaded an audio file and used the microphone. "
                "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
            )

        elif (microphone is None) and (file_upload is None):
            return "ERROR: You have to either use the microphone or upload an audio file"

        file = microphone if microphone is not None else file_upload
    
    result = send_req(reference_text, file)
    try:

        html_output = create_html_output(result["data"]["real_transcripts"], result["data"]["is_letter_correct_all_words"])
        html_output_ipa = create_html_output_ipa(result["data"]["word_score_list"])
    except Exception as e:
        print(e)
        print(result["data"]["real_transcripts"])
        print(result["data"]["is_letter_correct_all_words"])
        html_output = "ERROR: Something went wrong with the server response. Please try again later."

    
    return json.dumps(result, indent=4, ensure_ascii=False), html_output, html_output_ipa


demo = gr.Interface(
    fn=pa_check,
    inputs=[
        gr.Textbox(label="Url audio", type="text", placeholder="Download audio form url"),
        gr.Audio(sources="microphone", type="filepath"),
        gr.Audio(sources="upload", type="filepath"),
        gr.Textbox(label="Reference text", type="text", placeholder="How are you?|What is your name?"),
    ],
    outputs=[
             gr.Textbox(label="Output"),
             "html",
             "html"
    ],
    theme="huggingface",
    title="Pronunciation Assessment",
    allow_flagging="never"
)
demo.launch(auth=(os.environ['username'], os.environ['password']))