File size: 737 Bytes
ebc3d1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
from pprint import pprint
from time import time
from faster_whisper import WhisperModel

model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster")
test_audio = [
    "kotoba-whisper-eval/audio/long_interview_1.wav",
    "kotoba-whisper-eval/audio/manzai1.wav",
    "kotoba-whisper-eval/audio/manzai2.wav",
    "kotoba-whisper-eval/audio/manzai3.wav"
]
result = {}
for test_audio_file in test_audio:
    start = time()
    segments, info = model.transcribe(test_audio_file, language="ja", chunk_length=15, condition_on_previous_text=False)
    for segment in segments:
        print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
    elapsed = time() - start
    result[test_audio_file] = elapsed
pprint(result)