asahi417 commited on
Commit
1482df5
·
verified ·
1 Parent(s): 5417738

Create benchmark.sh

Browse files
Files changed (1) hide show
  1. benchmark.sh +23 -0
benchmark.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # clone dataset
2
+ git clone https://huggingface.co/datasets/kotoba-tech/kotoba-whisper-eval
3
+ # convert to 16khz
4
+ ffmpeg -i kotoba-whisper-eval/audio/long_interview_1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/long_interview_1.wav
5
+ ffmpeg -i kotoba-whisper-eval/audio/manzai1.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai1.wav
6
+ ffmpeg -i kotoba-whisper-eval/audio/manzai2.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai2.wav
7
+ ffmpeg -i kotoba-whisper-eval/audio/manzai3.mp3 -ar 16000 -ac 1 -c:a pcm_s16le kotoba-whisper-eval/audio/manzai3.wav
8
+ # cache the model
9
+ python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster")'
10
+ SECONDS=0
11
+ python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); segments=model.transcribe("kotoba-whisper-eval/audio/long_interview_1.wav", language="ja", chunk_length=15, condition_on_previous_text=False); for segment in segments:print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))'
12
+
13
+ TIME_INTERVIEW=$SECONDS
14
+ SECONDS=0
15
+ python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); model.transcribe("kotoba-whisper-eval/audio/manzai1.wav", language="ja", chunk_length=15, condition_on_previous_text=False)"'
16
+ TIME_MANZAI1=$SECONDS
17
+ SECONDS=0
18
+ python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); model.transcribe("kotoba-whisper-eval/audio/manzai2.wav", language="ja", chunk_length=15, condition_on_previous_text=False)"'
19
+ TIME_MANZAI2=$SECONDS
20
+ SECONDS=0
21
+ python -c 'from faster_whisper import WhisperModel; model = WhisperModel("kotoba-tech/kotoba-whisper-v1.0-faster"); model.transcribe("kotoba-whisper-eval/audio/manzai3.wav", language="ja", chunk_length=15, condition_on_previous_text=False)"'
22
+ TIME_MANZAI3=$SECONDS
23
+