Spaces:
Sleeping
Sleeping
import gradio as gr | |
import time | |
from transformers import pipeline | |
import torch | |
import ffmpeg # Make sure it's ffmpeg-python | |
def main(): | |
# Check if GPU is available | |
use_gpu = torch.cuda.is_available() | |
# Configure the pipeline to use the GPU if available | |
if use_gpu: | |
p = pipeline( | |
"automatic-speech-recognition", | |
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", | |
device=0 | |
) | |
else: | |
p = pipeline( | |
"automatic-speech-recognition", | |
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h" | |
) | |
def extract_audio_from_m3u8(url): | |
try: | |
output_file = "output_audio.aac" | |
ffmpeg.input(url).output(output_file).run(overwrite_output=True) | |
return output_file | |
except Exception as e: | |
return f"An error occurred: {e}" | |
def transcribe_function(audio, state, uploaded_audio, m3u8_url): | |
if m3u8_url: | |
audio = extract_audio_from_m3u8(m3u8_url) | |
if uploaded_audio is not None: | |
audio = uploaded_audio | |
if not audio: | |
# Return a meaningful message; no audio found | |
return {state_var: state, transcription_var: state} | |
try: | |
time.sleep(3) | |
text = p(audio, chunk_length_s=50)["text"] | |
state += text + "\n" | |
return {state_var: state, transcription_var: state} | |
except Exception as e: | |
return { | |
transcription_var: "An error occurred during transcription.", | |
state_var: state | |
} | |
def reset_output(transcription, state): | |
"""Function to reset the state to an empty string.""" | |
return "", "" | |
with gr.Blocks() as demo: | |
state_var = gr.State("") | |
with gr.Row(): | |
with gr.Column(): | |
microphone = gr.Audio( | |
source="microphone", | |
type="filepath", | |
label="Microphone" | |
) | |
uploaded_audio = gr.Audio( | |
label="Upload Audio File", | |
type="filepath", | |
source="upload" | |
) | |
m3u8_url = gr.Textbox( | |
label="m3u8 URL | E.g.: from kvf.fo or logting.fo" | |
) | |
with gr.Column(): | |
transcription_var = gr.Textbox( | |
type="text", | |
label="Transcription", | |
readonly=True | |
) | |
with gr.Row(): | |
transcribe_button = gr.Button("Transcribe") | |
reset_button = gr.Button("Reset output") | |
transcribe_button.click( | |
transcribe_function, | |
[microphone, state_var, uploaded_audio, m3u8_url], | |
[transcription_var, state_var] | |
) | |
reset_button.click( | |
reset_output, | |
[transcription_var, state_var], | |
[transcription_var, state_var] | |
) | |
# Launch with the latest Gradio features | |
demo.launch() | |
if __name__ == "__main__": | |
main() | |