File size: 1,977 Bytes
72277b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
from argparse import Namespace, ArgumentParser

class WhisperLiveKit:
    _instance = None
    _initialized = False
    
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self, args=None, **kwargs):
        if WhisperLiveKit._initialized:
            return
            
        if args is None:
            args = Namespace(
                host="localhost",
                port=8000,
                warmup_file=None,
                confidence_validation=False,
                diarization=False,
                transcription=True,
                min_chunk_size=0.5,
                model="base",
                model_cache_dir=None,
                model_dir=None,
                lan="auto",
                task="transcribe",
                backend="faster-whisper",
                vac=False,
                vac_chunk_size=0.04,
                vad=True,
                buffer_trimming="sentence",
                buffer_trimming_sec=1.0,
                log_level="INFO"
            )
        
        self.args = args
        
        self.asr = None
        self.tokenizer = None
        self.diarization = None
        
        if self.args.transcription:
            self.asr, self.tokenizer = backend_factory(self.args)
            warmup_asr(self.asr, self.args.warmup_file)

        if self.args.diarization:
            from diarization.diarization_online import DiartDiarization
            self.diarization = DiartDiarization()
            
        WhisperLiveKit._initialized = True

    def web_interface(self):
        import pkg_resources
        html_path = pkg_resources.resource_filename('whisperlivekit', 'web/live_transcription.html')
        with open(html_path, "r", encoding="utf-8") as f:
            html = f.read()
        return html