Spaces:
Running
on
Zero
Running
on
Zero
File size: 839 Bytes
f4c725a c8672f7 7f74ea0 f4c725a c8672f7 f4c725a 7f74ea0 f4c725a 5ece751 f4c725a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from transformers import pipeline
from accelerate import Accelerator
import spaces
import librosa
model_id = "JacobLinCool/whisper-large-v3-turbo-common_voice_19_0-zh-TW"
pipe = None
def load_model():
global pipe
device = Accelerator().device
pipe = pipeline("automatic-speech-recognition", model=model_id, device=device)
def get_gpu_duration(audio: str) -> int:
y, sr = librosa.load(audio)
duration = librosa.get_duration(y=y, sr=sr) / 60.0
gpu_duration = (duration + 59.0) // 60.0
print(f"{duration=}, {gpu_duration=}")
return max(1, int(gpu_duration))
@spaces.GPU(duration=get_gpu_duration)
def transcribe_audio_local(audio: str) -> str:
print(f"{audio=}")
if pipe is None:
load_model()
out = pipe(audio, return_timestamps=True)
print(f"{out=}")
return out["text"]
|