|
import io |
|
import base64 |
|
from faster_whisper import WhisperModel |
|
import logging |
|
|
|
logging.basicConfig(level=logging.DEBUG) |
|
|
|
|
|
class EndpointHandler: |
|
def __init__(self, path=""): |
|
self.model = WhisperModel("large-v3", num_workers=30) |
|
|
|
def __call__(self, data: dict[str, str]): |
|
|
|
inputs = data.pop("inputs", data) |
|
language = data.pop("language", "de") |
|
task = data.pop("task", "transcribe") |
|
|
|
|
|
audio_bytes_decoded = base64.b64decode(inputs) |
|
logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}") |
|
audio_bytes = io.BytesIO(audio_bytes_decoded) |
|
|
|
|
|
logging.info("Running inference...") |
|
segments, info = self.model.transcribe(audio_bytes, language=language, task=task) |
|
|
|
|
|
full_text = [] |
|
for segment in segments: |
|
full_text.append({"segmentId": segment.id, |
|
"text": segment.text, |
|
"timestamps": { |
|
"start": segment.start, |
|
"end": segment.end |
|
} |
|
}) |
|
|
|
if segment.id % 100 == 0: |
|
logging.info("segment " + str(segment.id) + " transcribed") |
|
logging.info("Inference completed.") |
|
|
|
return full_text |
|
|