File size: 3,155 Bytes
7b18d60
 
502159a
eb134bd
d9a4d6b
7b18d60
9038461
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import gradio as gr
import time
from transformers import pipeline
import torch
import ffmpeg  # Make sure it's ffmpeg-python

def main():
    # Check if GPU is available
    use_gpu = torch.cuda.is_available()

    # Configure the pipeline to use the GPU if available
    if use_gpu:
        p = pipeline(
            "automatic-speech-recognition",
            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h",
            device=0
        )
    else:
        p = pipeline(
            "automatic-speech-recognition",
            model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
        )

    def extract_audio_from_m3u8(url):
        try:
            output_file = "output_audio.aac"
            ffmpeg.input(url).output(output_file).run(overwrite_output=True)
            return output_file
        except Exception as e:
            return f"An error occurred: {e}"

    def transcribe_function(audio, state, uploaded_audio, m3u8_url):
        if m3u8_url:
            audio = extract_audio_from_m3u8(m3u8_url)

        if uploaded_audio is not None:
            audio = uploaded_audio

        if not audio:
            # Return a meaningful message; no audio found
            return {state_var: state, transcription_var: state}

        try:
            time.sleep(3)
            text = p(audio, chunk_length_s=50)["text"]
            state += text + "\n"
            return {state_var: state, transcription_var: state}
        except Exception as e:
            return {
                transcription_var: "An error occurred during transcription.",
                state_var: state
            }

    def reset_output(transcription, state):
        """Function to reset the state to an empty string."""
        return "", ""

    with gr.Blocks() as demo:
        state_var = gr.State("")

        with gr.Row():
            with gr.Column():
                microphone = gr.Audio(
                    source="microphone",
                    type="filepath",
                    label="Microphone"
                )
                uploaded_audio = gr.Audio(
                    label="Upload Audio File",
                    type="filepath",
                    source="upload"
                )
                m3u8_url = gr.Textbox(
                    label="m3u8 URL | E.g.: from kvf.fo or logting.fo"
                )

            with gr.Column():
                transcription_var = gr.Textbox(
                    type="text",
                    label="Transcription",
                    readonly=True
                )

        with gr.Row():
            transcribe_button = gr.Button("Transcribe")
            reset_button = gr.Button("Reset output")

        transcribe_button.click(
            transcribe_function,
            [microphone, state_var, uploaded_audio, m3u8_url],
            [transcription_var, state_var]
        )

        reset_button.click(
            reset_output,
            [transcription_var, state_var],
            [transcription_var, state_var]
        )

    # Launch with the latest Gradio features
    demo.launch()

if __name__ == "__main__":
    main()