File size: 1,450 Bytes
96d549d
ab0e749
96d549d
ab0e749
2b16bc4
ab0e749
91251fa
 
 
 
23d99fb
91251fa
 
ab0e749
 
91251fa
 
 
ab0e749
 
 
 
91251fa
ab0e749
91251fa
ab0e749
91251fa
ab0e749
91251fa
 
 
 
 
 
 
 
 
 
 
 
 
 
ab0e749
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import io
import base64
from faster_whisper import WhisperModel
import logging

logging.basicConfig(level=logging.DEBUG)


class EndpointHandler:
    def __init__(self, path=""):
        self.model = WhisperModel("large-v3", num_workers=30)

    def __call__(self, data: dict[str, str]):
        # process inputs
        inputs = data.pop("inputs", data)
        language = data.pop("language", "de")
        task = data.pop("task", "transcribe")

        # Decode base64 string to bytes
        audio_bytes_decoded = base64.b64decode(inputs)
        logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
        audio_bytes = io.BytesIO(audio_bytes_decoded)

        # run inference pipeline
        logging.info("Running inference...")
        segments, info = self.model.transcribe(audio_bytes, language=language, task=task)

        # postprocess the prediction
        full_text = []
        for segment in segments:
            full_text.append({"segmentId": segment.id,
                              "text": segment.text,
                              "timestamps": {
                                  "start": segment.start,
                                  "end": segment.end
                              }
                              })

            if segment.id % 100 == 0:
                logging.info("segment " + str(segment.id) + " transcribed")
        logging.info("Inference completed.")

        return full_text