File size: 1,858 Bytes
91251fa 96d549d 91251fa 96d549d 91251fa 96d549d 91251fa 96d549d 91251fa 96d549d 91251fa 96d549d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
import base64
import io
import logging
from faster_whisper import WhisperModel
from flie_processor import process_video
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
class EndpointHandler:
def __init__(self, path=""):
self.model = WhisperModel("large-v2", num_workers=30)
def __call__(self, data: dict[str, str]):
inputs = data.pop("inputs")
link = data.pop("link")
language = data.pop("language", "de")
task = data.pop("task", "transcribe")
processing_type = data.pop("type", "audio")
response = {}
if processing_type == "link":
slides, audio_bytes = process_video(link)
slides_list = [slide.to_dict() for slide in slides]
response.update({"slides": slides_list})
else:
audio_bytes_decoded = base64.b64decode(inputs)
logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
audio_bytes = io.BytesIO(audio_bytes_decoded)
# run inference pipeline
logging.info("Running inference...")
segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
full_text = []
for segment in segments:
full_text.append({"segmentId": segment.id,
"text": segment.text,
"timestamps": {
"start": segment.start,
"end": segment.end
}
})
if segment.id % 100 == 0:
logging.info("segment " + str(segment.id) + " transcribed")
logging.info("Inference completed.")
response.update({"audios": full_text})
logging.debug(response)
return response
|