import gradio as gr import time import torch from transformers import pipeline import numpy as np # Check if GPU is available use_gpu = torch.cuda.is_available() # Configure the pipeline to use the GPU if available if use_gpu: p = pipeline("automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0) else: p = pipeline("automatic-speech-recognition", model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h") chunk_size = 30 # Adjust the chunk size as needed def transcribe(audio, state="", uploaded_audio=None): if uploaded_audio is not None: audio = uploaded_audio if not audio: return state, state # Return a meaningful message try: state += "Transcribing...\n" chunks = [audio[i:i + chunk_size] for i in range(0, len(audio), chunk_size)] for chunk in chunks: text = p(chunk)["text"] state += text + "\n" time.sleep(1) # Simulate processing time for each chunk return state, state except Exception as e: return "An error occurred during transcription.", state # Handle other exceptions gr.Interface( fn=transcribe, inputs=[ gr.inputs.Audio(source="microphone", type="numpy"), 'state', gr.inputs.Audio(label="Upload Audio File", type="numpy", source="upload") ], outputs=[ "textbox", "state" ], live=True ).launch()