File size: 1,450 Bytes
96d549d ab0e749 96d549d ab0e749 2b16bc4 ab0e749 91251fa 23d99fb 91251fa ab0e749 91251fa ab0e749 91251fa ab0e749 91251fa ab0e749 91251fa ab0e749 91251fa ab0e749 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import io
import base64
from faster_whisper import WhisperModel
import logging
logging.basicConfig(level=logging.DEBUG)
class EndpointHandler:
def __init__(self, path=""):
self.model = WhisperModel("large-v3", num_workers=30)
def __call__(self, data: dict[str, str]):
# process inputs
inputs = data.pop("inputs", data)
language = data.pop("language", "de")
task = data.pop("task", "transcribe")
# Decode base64 string to bytes
audio_bytes_decoded = base64.b64decode(inputs)
logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
audio_bytes = io.BytesIO(audio_bytes_decoded)
# run inference pipeline
logging.info("Running inference...")
segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
# postprocess the prediction
full_text = []
for segment in segments:
full_text.append({"segmentId": segment.id,
"text": segment.text,
"timestamps": {
"start": segment.start,
"end": segment.end
}
})
if segment.id % 100 == 0:
logging.info("segment " + str(segment.id) + " transcribed")
logging.info("Inference completed.")
return full_text
|