dvislobokov commited on
Commit
b5bf292
·
verified ·
1 Parent(s): 56a31c1

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -1
README.md CHANGED
@@ -7,4 +7,48 @@ language:
7
  base_model:
8
  - dvislobokov/whisper-large-v3-turbo-russian
9
  pipeline_tag: automatic-speech-recognition
10
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  base_model:
8
  - dvislobokov/whisper-large-v3-turbo-russian
9
  pipeline_tag: automatic-speech-recognition
10
+ ---
11
+
12
+ ## Example of use this model with faster-whisper
13
+
14
+ ```python
15
+ import io
16
+ import json
17
+ import logging
18
+ import sys
19
+ import time
20
+ from datetime import datetime
21
+ from faster_whisper import WhisperModel
22
+ from pydub import AudioSegment
23
+
24
+ logging.basicConfig(
25
+ level=logging.INFO,
26
+ format='%(asctime)s - %(levelname)s - %(message)s',
27
+ handlers=[
28
+ logging.FileHandler('faster-whisper.log'),
29
+ logging.StreamHandler(sys.stdout)
30
+ ]
31
+ )
32
+ model = WhisperModel("/path/to/dvislobokov/faster-whisper-large-v3-turbo-russian", "cpu")
33
+
34
+ audio = AudioSegment.from_wav("ezyZip.wav")
35
+ chunk_length = 30 * 1000 # in milliseconds
36
+ chunks = [audio[i:i + chunk_length] for i in range(0, len(audio), chunk_length)]
37
+
38
+
39
+ logging.info(f'Start transcribe at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
40
+ start = time.time()
41
+
42
+ text = []
43
+ for i, chunk in enumerate(chunks):
44
+ buffer = io.BytesIO()
45
+ chunk.export(buffer, format="wav")
46
+ segments, info = model.transcribe(buffer, language="ru")
47
+ text.append("".join(segment.text for segment in segments))
48
+ end = time.time()
49
+ logging.info(f'Finish transcribe at {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}')
50
+ logging.info(f'Total time: {end - start}')
51
+ logging.info(f'Text: {text}')
52
+ ```
53
+
54
+