File size: 2,589 Bytes
f802c6e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c3d334
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.

# %% auto 0
__all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech']

# %% app.ipynb 4
import gradio as gr
from faster_whisper import WhisperModel

# %% app.ipynb 8
def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
    
    model = WhisperModel(folder, device=device, compute_type=compute_type)
    segments, info = model.transcribe(audio_file, beam_size=5)

    lst = []
    for segment in segments:
        # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
        lst.append(segment.text)

    return(" ".join(lst))

# %% app.ipynb 9
def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
    warn_output = ""
    if (microphone is not None) and (file_upload is not None):
        warn_output = (
            "WARNING: You've uploaded an audio file and used the microphone. "
            "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
        )

    elif (microphone is None) and (file_upload is None):
        return "ERROR: You have to either use the microphone or upload an audio file"

    audio_file = microphone if microphone is not None else file_upload
    
    model = WhisperModel(folder, device=device, compute_type=compute_type)
    segments, info = model.transcribe(audio_file, beam_size=5)

    lst = []
    for segment in segments:
        # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
        lst.append(segment.text)

    return(" ".join(lst))

# %% app.ipynb 16
mf_transcribe = gr.Interface(
    fn=gr_transcribe_malayalam_speech,
    inputs=[
        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
        gr.inputs.Audio(source="upload", type="filepath", optional=True),
    ],
    outputs="text",
    title="PALLAKKU (പല്ലക്ക്)",
    description=(
        "Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)."
    ),
    article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ [email protected]",
    allow_flagging="never",
)

# %% app.ipynb 17
mf_transcribe.launch(share=False)