File size: 2,994 Bytes
5a10bfe
5e825a2
 
5a10bfe
5e825a2
 
 
5a10bfe
 
 
 
5e825a2
5a10bfe
 
5e825a2
5a10bfe
5e825a2
 
 
 
 
 
 
 
 
 
 
 
 
 
5a10bfe
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5e825a2
 
 
 
 
 
 
 
 
 
 
 
5a10bfe
5e825a2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5a10bfe
5e825a2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import requests
import gradio as gr
import os
import urllib.request

from gradio_client import Client

hf_link = os.environ['link']
client = Client(hf_link)
local_link = os.environ['local_link']
local_api = os.environ['local_api']

local_code = requests.head(local_link).status_code
print(local_code)

def s2t_hf(source, mic, fi, lang):
    if source == 'file':
        in_file = fi
    else:
        in_file = mic

    result = client.predict(
				in_file,	# str (filepath or URL to file) in 'inputs' Audio component
				"transcribe",	# str in 'Task' Radio component
				True,	# bool in 'Return timestamps' Checkbox component
				api_name="/predict"
            )
    print(result)
    return result

def s2t_local(source, mic, fi, lang):
    if source == 'file':
        in_file = fi
    else:
        in_file = mic

    form_data = {"language": lang}
    files = {'input_file': open(in_file, 'rb')}
    print(files)
    
    response = requests.post(local_api, files=files, data=form_data, timeout=10000)

    return response.json()

def s2t(source, mic, fi, lang):
    if local_code==200:
        return s2t_local(source, mic, fi, lang)
    else:
        return s2t_hf(source, mic, fi, lang)

def update_audio_ui(audio_source: str, input_audio_mic, input_audio_file):
    mic = audio_source == "microphone"
    # input_audio_mic.visible = mic
    # print(type(gr.update(visible=mic, value=None)))
    return gr.update(visible=mic, value=None), gr.update(visible=not mic, value=None)

with gr.Blocks() as demo:
    gr.Markdown('<h1 style="text-align: center;">Speech to Text</h1>')
    with gr.Group():
        with gr.Row() as audio_box:
            with gr.Column():
                input_lang = gr.Dropdown(['auto','vi', 'ja', 'en', 'cn', 'ko'], label='Language?', value='en', interactive=True)
                audio_source = gr.Radio(
                    label="Audio source",
                    choices=["file", "microphone"],
                    value="file",
                    interactive=True
                )
            input_audio_mic = gr.Audio(
                label="Input speech",
                type="filepath",
                sources="microphone",
                visible=False,
            )
            input_audio_file = gr.Audio(
                label="Input speech",
                type="filepath",
                sources="upload",
                visible=True,
            )
        js = gr.JSON(label="json")
        with gr.Row():
            btn = gr.Button("Run")
            btn_clean = gr.ClearButton([input_audio_mic, input_audio_file, js])

    audio_source.change(
        fn=update_audio_ui,
        inputs=[audio_source, input_audio_mic, input_audio_file],
        outputs=[
            input_audio_mic,
            input_audio_file,
        ],
        queue=False,
        api_name=False,
    )


    btn.click(fn=s2t, inputs=[audio_source, input_audio_mic, input_audio_file, input_lang], outputs=[js])

if __name__ == "__main__":
    demo.launch()