faster-whisper-readme / handler.py
ManBib's picture
reset to only audio processing
ab0e749
import io
import base64
from faster_whisper import WhisperModel
import logging
logging.basicConfig(level=logging.DEBUG)
class EndpointHandler:
def __init__(self, path=""):
self.model = WhisperModel("large-v3", num_workers=30)
def __call__(self, data: dict[str, str]):
# process inputs
inputs = data.pop("inputs", data)
language = data.pop("language", "de")
task = data.pop("task", "transcribe")
# Decode base64 string to bytes
audio_bytes_decoded = base64.b64decode(inputs)
logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
audio_bytes = io.BytesIO(audio_bytes_decoded)
# run inference pipeline
logging.info("Running inference...")
segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
# postprocess the prediction
full_text = []
for segment in segments:
full_text.append({"segmentId": segment.id,
"text": segment.text,
"timestamps": {
"start": segment.start,
"end": segment.end
}
})
if segment.id % 100 == 0:
logging.info("segment " + str(segment.id) + " transcribed")
logging.info("Inference completed.")
return full_text