File size: 2,553 Bytes
72277b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15421e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72277b5
15421e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763a8af
15421e7
72277b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
from whisper_streaming_custom.whisper_online import backend_factory, warmup_asr
from argparse import Namespace, ArgumentParser

class WhisperLiveKit:
    _instance = None
    _initialized = False
    
    def __new__(cls, *args, **kwargs):
        if cls._instance is None:
            cls._instance = super().__new__(cls)
        return cls._instance
    
    def __init__(self, args=None, **kwargs):
        if WhisperLiveKit._initialized:
            return
            
        # Define default args
        default_args = Namespace(
            host="localhost",
            port=8000,
            warmup_file=None,
            confidence_validation=False,
            diarization=False,
            transcription=True,
            min_chunk_size=0.5,
            model="base",
            model_cache_dir=None,
            model_dir=None,
            lan="en",
            task="transcribe",
            backend="faster-whisper",
            vac=False,
            vac_chunk_size=0.04,
            vad=True,
            buffer_trimming="sentence",
            buffer_trimming_sec=1.0,
            log_level="INFO"
        )
        
        # If args is provided, merge it with defaults
        if args is not None:
            # Convert args to dict if it's a Namespace
            if isinstance(args, Namespace):
                args_dict = vars(args)
            else:
                args_dict = args
                
            # Convert default_args to dict
            default_dict = vars(default_args)
            
            # Merge provided args with defaults
            merged_dict = {**default_dict, **args_dict}
            
            # Convert back to Namespace
            self.args = Namespace(**merged_dict)
        else:
            self.args = default_args

        print(self.args)
        
        self.asr = None
        self.tokenizer = None
        self.diarization = None
        
        if self.args.transcription:
            self.asr, self.tokenizer = backend_factory(self.args)
            warmup_asr(self.asr, self.args.warmup_file)

        if self.args.diarization:
            from diarization.diarization_online import DiartDiarization
            self.diarization = DiartDiarization()
            
        WhisperLiveKit._initialized = True

    def web_interface(self):
        import pkg_resources
        html_path = pkg_resources.resource_filename('whisperlivekit', 'web/live_transcription.html')
        with open(html_path, "r", encoding="utf-8") as f:
            html = f.read()
        return html