File size: 827 Bytes
853bb53
828ccc6
853bb53
 
 
828ccc6
cfed61f
853bb53
 
 
 
cfed61f
853bb53
 
 
 
 
 
 
f0f40a8
853bb53
41dd0b3
853bb53
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
from typing import Dict, Any, List
from transformers import pipeline
import torch
from transformers.pipelines.audio_utils import ffmpeg_read

#ffmpeg
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


class EndpointHandler:
    def __init__(self, path=""):
        self.pipe = pipeline(task='automatic-speech-recognition', model=path, device=device)
        
    def __call__(self, data: Any) -> List[Dict[str, str]]:

        inputs = data.pop("inputs", data)
        audio_nparray = ffmpeg_read(inputs, 16000)
        audio_tensor= torch.from_numpy(audio_nparray)
        
        transcribe = self.pipe
        transcribe.model.config.forced_decoder_ids = transcribe.tokenizer.get_decoder_prompt_ids(language="ko", task="transcribe")
        result = transcribe(audio_nparray)  

        return result