import gradio as gr import time from transformers import pipeline import torch import ffmpeg # Make sure it's ffmpeg-python def main(): # Check if GPU is available use_gpu = torch.cuda.is_available() # Configure the pipeline to use the GPU if available if use_gpu: p = pipeline( "automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0 ) else: p = pipeline( "automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h" ) def extract_audio_from_m3u8(url): try: output_file = "output_audio.aac" ffmpeg.input(url).output(output_file).run(overwrite_output=True) return output_file except Exception as e: return f"An error occurred: {e}" def transcribe_function(audio, state, uploaded_audio, m3u8_url): if m3u8_url: audio = extract_audio_from_m3u8(m3u8_url) if uploaded_audio is not None: audio = uploaded_audio if not audio: # Return a meaningful message; no audio found return {state_var: state, transcription_var: state} try: time.sleep(3) text = p(audio, chunk_length_s=50)["text"] state += text + "\n" return {state_var: state, transcription_var: state} except Exception as e: return { transcription_var: "An error occurred during transcription.", state_var: state } def reset_output(transcription, state): """Function to reset the state to an empty string.""" return "", "" with gr.Blocks() as demo: state_var = gr.State("") with gr.Row(): with gr.Column(): microphone = gr.Audio( source="microphone", type="filepath", label="Microphone" ) uploaded_audio = gr.Audio( label="Upload Audio File", type="filepath", source="upload" ) m3u8_url = gr.Textbox( label="m3u8 URL | E.g.: from kvf.fo or logting.fo" ) with gr.Column(): transcription_var = gr.Textbox( type="text", label="Transcription", readonly=True ) with gr.Row(): transcribe_button = gr.Button("Transcribe") reset_button = gr.Button("Reset output") transcribe_button.click( transcribe_function, [microphone, state_var, uploaded_audio, m3u8_url], [transcription_var, state_var] ) reset_button.click( reset_output, [transcription_var, state_var], [transcription_var, state_var] ) # Launch with the latest Gradio features demo.launch() if __name__ == "__main__": main()