Spaces:
Running
Running
import gradio as gr | |
import time | |
import torch | |
from transformers import pipeline | |
import numpy as np | |
# Check if GPU is available | |
use_gpu = torch.cuda.is_available() | |
# Configure the pipeline to use the GPU if available | |
if use_gpu: | |
p = pipeline("automatic-speech-recognition", | |
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h", device=0) | |
else: | |
p = pipeline("automatic-speech-recognition", | |
model="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h") | |
chunk_size = 30 # Adjust the chunk size as needed | |
def transcribe(audio, state="", uploaded_audio=None): | |
if uploaded_audio is not None: | |
audio = uploaded_audio | |
if not audio: | |
return state, state # Return a meaningful message | |
try: | |
state += "Transcribing...\n" | |
chunks = [audio[i:i + chunk_size] for i in range(0, len(audio), chunk_size)] | |
for chunk in chunks: | |
text = p(chunk)["text"] | |
state += text + "\n" | |
time.sleep(1) # Simulate processing time for each chunk | |
return state, state | |
except Exception as e: | |
return "An error occurred during transcription.", state # Handle other exceptions | |
gr.Interface( | |
fn=transcribe, | |
inputs=[ | |
gr.inputs.Audio(source="microphone", type="numpy"), | |
'state', | |
gr.inputs.Audio(label="Upload Audio File", type="numpy", source="upload") | |
], | |
outputs=[ | |
"textbox", | |
"state" | |
], | |
live=True | |
).launch() | |