Spaces:
Runtime error
Runtime error
File size: 2,062 Bytes
e48a855 b5157ee e8e1ee0 e48a855 0c81086 e48a855 967d7b8 b7a67ca e48a855 0c81086 e48a855 0c81086 e48a855 0c81086 e48a855 0c81086 967d7b8 e48a855 e8e1ee0 967d7b8 e8e1ee0 e48a855 e8e1ee0 01f871e e8e1ee0 e48a855 967d7b8 e48a855 967d7b8 b8622b9 dd040d4 c41db98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
import gradio as gr
import os
import torch
from transformers import (
WhisperForConditionalGeneration,
WhisperTokenizer,
WhisperProcessor,
pipeline
)
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
model_id = "sudoping01/whosper-large"
# Load model and create pipeline
model = WhisperForConditionalGeneration.from_pretrained(
model_id,
device_map="auto",
use_cache=True,
attention_dropout=0.1,
dropout=0.1,
token=os.environ.get("HF_TOKEN")
)
model.config.suppress_tokens = []
model.config.no_repeat_ngram_size = 3
model.config.early_stopping = True
model.config.max_length = 448
model.config.num_beams = 5
tokenizer = WhisperTokenizer.from_pretrained(model_id)
processor = WhisperProcessor.from_pretrained(model_id)
feature_extractor = processor.feature_extractor
pipe = pipeline(
"automatic-speech-recognition",
model=model,
tokenizer=tokenizer,
feature_extractor=processor.feature_extractor,
torch_dtype=torch_dtype,
chunk_length_s=30,
stride_length_s=3,
return_timestamps=False,
batch_size=1
)
def transcribe(audio):
if audio is None:
return "Please provide an audio input."
try:
result = pipe(
audio,
generate_kwargs={
"temperature": 0.0,
"do_sample": False,
"num_beams": 5,
"length_penalty": 1.0,
"repetition_penalty": 1.2
}
)
return result["text"]
except Exception as e:
return f"Error during transcription: {str(e)}"
# Create Gradio interface
demo = gr.Interface(
fn=transcribe,
inputs=[
gr.Audio(sources=["microphone", "upload"], type="filepath")
],
outputs=gr.Textbox(label="Transcription"),
title="Multilingual Speech Recognition: Wolof, French, English, .. or Mix",
description="Upload an audio file or record audio to transcribe Wolof, French, or English speech...",
theme="default"
)
if __name__ == "__main__":
demo.launch() |