ManBib commited on
Commit
91251fa
·
1 Parent(s): 8cfbecf

initial commit

Browse files
Files changed (7) hide show
  1. .idea/.gitignore +8 -0
  2. README.md +140 -0
  3. config.json +291 -0
  4. handler.py +43 -0
  5. requirements.txt +0 -0
  6. tokenizer.json +0 -0
  7. vocabulary.txt +0 -0
.idea/.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # Default ignored files
2
+ /shelf/
3
+ /workspace.xml
4
+ # Editor-based HTTP Client requests
5
+ /httpRequests/
6
+ # Datasource local storage ignored files
7
+ /dataSources/
8
+ /dataSources.local.xml
README.md ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ - zh
5
+ - de
6
+ - es
7
+ - ru
8
+ - ko
9
+ - fr
10
+ - ja
11
+ - pt
12
+ - tr
13
+ - pl
14
+ - ca
15
+ - nl
16
+ - ar
17
+ - sv
18
+ - it
19
+ - id
20
+ - hi
21
+ - fi
22
+ - vi
23
+ - he
24
+ - uk
25
+ - el
26
+ - ms
27
+ - cs
28
+ - ro
29
+ - da
30
+ - hu
31
+ - ta
32
+ - 'no'
33
+ - th
34
+ - ur
35
+ - hr
36
+ - bg
37
+ - lt
38
+ - la
39
+ - mi
40
+ - ml
41
+ - cy
42
+ - sk
43
+ - te
44
+ - fa
45
+ - lv
46
+ - bn
47
+ - sr
48
+ - az
49
+ - sl
50
+ - kn
51
+ - et
52
+ - mk
53
+ - br
54
+ - eu
55
+ - is
56
+ - hy
57
+ - ne
58
+ - mn
59
+ - bs
60
+ - kk
61
+ - sq
62
+ - sw
63
+ - gl
64
+ - mr
65
+ - pa
66
+ - si
67
+ - km
68
+ - sn
69
+ - yo
70
+ - so
71
+ - af
72
+ - oc
73
+ - ka
74
+ - be
75
+ - tg
76
+ - sd
77
+ - gu
78
+ - am
79
+ - yi
80
+ - lo
81
+ - uz
82
+ - fo
83
+ - ht
84
+ - ps
85
+ - tk
86
+ - nn
87
+ - mt
88
+ - sa
89
+ - lb
90
+ - my
91
+ - bo
92
+ - tl
93
+ - mg
94
+ - as
95
+ - tt
96
+ - haw
97
+ - ln
98
+ - ha
99
+ - ba
100
+ - jw
101
+ - su
102
+ tags:
103
+ - audio
104
+ - automatic-speech-recognition
105
+ license: mit
106
+ library_name: ctranslate2
107
+ ---
108
+
109
+ # Whisper large-v2 model for CTranslate2
110
+
111
+ This repository contains the conversion of [openai/whisper-large-v2](https://huggingface.co/openai/whisper-large-v2) to the [CTranslate2](https://github.com/OpenNMT/CTranslate2) model format.
112
+
113
+ This model can be used in CTranslate2 or projects based on CTranslate2 such as [faster-whisper](https://github.com/guillaumekln/faster-whisper).
114
+
115
+ ## Example
116
+
117
+ ```python
118
+ from faster_whisper import WhisperModel
119
+
120
+ model = WhisperModel("large-v2")
121
+
122
+ segments, info = model.transcribe("audio.mp3")
123
+ for segment in segments:
124
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
125
+ ```
126
+
127
+ ## Conversion details
128
+
129
+ The original model was converted with the following command:
130
+
131
+ ```
132
+ ct2-transformers-converter --model openai/whisper-large-v2 --output_dir faster-whisper-large-v2 \
133
+ --copy_files tokenizer.json --quantization float16
134
+ ```
135
+
136
+ Note that the model weights are saved in FP16. This type can be changed when the model is loaded using the [`compute_type` option in CTranslate2](https://opennmt.net/CTranslate2/quantization.html).
137
+
138
+ ## More information
139
+
140
+ **For more information about the original model, see its [model card](https://huggingface.co/openai/whisper-large-v2).**
config.json ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 10,
5
+ 12
6
+ ],
7
+ [
8
+ 13,
9
+ 17
10
+ ],
11
+ [
12
+ 16,
13
+ 11
14
+ ],
15
+ [
16
+ 16,
17
+ 12
18
+ ],
19
+ [
20
+ 16,
21
+ 13
22
+ ],
23
+ [
24
+ 17,
25
+ 15
26
+ ],
27
+ [
28
+ 17,
29
+ 16
30
+ ],
31
+ [
32
+ 18,
33
+ 4
34
+ ],
35
+ [
36
+ 18,
37
+ 11
38
+ ],
39
+ [
40
+ 18,
41
+ 19
42
+ ],
43
+ [
44
+ 19,
45
+ 11
46
+ ],
47
+ [
48
+ 21,
49
+ 2
50
+ ],
51
+ [
52
+ 21,
53
+ 3
54
+ ],
55
+ [
56
+ 22,
57
+ 3
58
+ ],
59
+ [
60
+ 22,
61
+ 9
62
+ ],
63
+ [
64
+ 22,
65
+ 12
66
+ ],
67
+ [
68
+ 23,
69
+ 5
70
+ ],
71
+ [
72
+ 23,
73
+ 7
74
+ ],
75
+ [
76
+ 23,
77
+ 13
78
+ ],
79
+ [
80
+ 25,
81
+ 5
82
+ ],
83
+ [
84
+ 26,
85
+ 1
86
+ ],
87
+ [
88
+ 26,
89
+ 12
90
+ ],
91
+ [
92
+ 27,
93
+ 15
94
+ ]
95
+ ],
96
+ "lang_ids": [
97
+ 50259,
98
+ 50260,
99
+ 50261,
100
+ 50262,
101
+ 50263,
102
+ 50264,
103
+ 50265,
104
+ 50266,
105
+ 50267,
106
+ 50268,
107
+ 50269,
108
+ 50270,
109
+ 50271,
110
+ 50272,
111
+ 50273,
112
+ 50274,
113
+ 50275,
114
+ 50276,
115
+ 50277,
116
+ 50278,
117
+ 50279,
118
+ 50280,
119
+ 50281,
120
+ 50282,
121
+ 50283,
122
+ 50284,
123
+ 50285,
124
+ 50286,
125
+ 50287,
126
+ 50288,
127
+ 50289,
128
+ 50290,
129
+ 50291,
130
+ 50292,
131
+ 50293,
132
+ 50294,
133
+ 50295,
134
+ 50296,
135
+ 50297,
136
+ 50298,
137
+ 50299,
138
+ 50300,
139
+ 50301,
140
+ 50302,
141
+ 50303,
142
+ 50304,
143
+ 50305,
144
+ 50306,
145
+ 50307,
146
+ 50308,
147
+ 50309,
148
+ 50310,
149
+ 50311,
150
+ 50312,
151
+ 50313,
152
+ 50314,
153
+ 50315,
154
+ 50316,
155
+ 50317,
156
+ 50318,
157
+ 50319,
158
+ 50320,
159
+ 50321,
160
+ 50322,
161
+ 50323,
162
+ 50324,
163
+ 50325,
164
+ 50326,
165
+ 50327,
166
+ 50328,
167
+ 50329,
168
+ 50330,
169
+ 50331,
170
+ 50332,
171
+ 50333,
172
+ 50334,
173
+ 50335,
174
+ 50336,
175
+ 50337,
176
+ 50338,
177
+ 50339,
178
+ 50340,
179
+ 50341,
180
+ 50342,
181
+ 50343,
182
+ 50344,
183
+ 50345,
184
+ 50346,
185
+ 50347,
186
+ 50348,
187
+ 50349,
188
+ 50350,
189
+ 50351,
190
+ 50352,
191
+ 50353,
192
+ 50354,
193
+ 50355,
194
+ 50356,
195
+ 50357
196
+ ],
197
+ "suppress_ids": [
198
+ 1,
199
+ 2,
200
+ 7,
201
+ 8,
202
+ 9,
203
+ 10,
204
+ 14,
205
+ 25,
206
+ 26,
207
+ 27,
208
+ 28,
209
+ 29,
210
+ 31,
211
+ 58,
212
+ 59,
213
+ 60,
214
+ 61,
215
+ 62,
216
+ 63,
217
+ 90,
218
+ 91,
219
+ 92,
220
+ 93,
221
+ 359,
222
+ 503,
223
+ 522,
224
+ 542,
225
+ 873,
226
+ 893,
227
+ 902,
228
+ 918,
229
+ 922,
230
+ 931,
231
+ 1350,
232
+ 1853,
233
+ 1982,
234
+ 2460,
235
+ 2627,
236
+ 3246,
237
+ 3253,
238
+ 3268,
239
+ 3536,
240
+ 3846,
241
+ 3961,
242
+ 4183,
243
+ 4667,
244
+ 6585,
245
+ 6647,
246
+ 7273,
247
+ 9061,
248
+ 9383,
249
+ 10428,
250
+ 10929,
251
+ 11938,
252
+ 12033,
253
+ 12331,
254
+ 12562,
255
+ 13793,
256
+ 14157,
257
+ 14635,
258
+ 15265,
259
+ 15618,
260
+ 16553,
261
+ 16604,
262
+ 18362,
263
+ 18956,
264
+ 20075,
265
+ 21675,
266
+ 22520,
267
+ 26130,
268
+ 26161,
269
+ 26435,
270
+ 28279,
271
+ 29464,
272
+ 31650,
273
+ 32302,
274
+ 32470,
275
+ 36865,
276
+ 42863,
277
+ 47425,
278
+ 49870,
279
+ 50254,
280
+ 50258,
281
+ 50358,
282
+ 50359,
283
+ 50360,
284
+ 50361,
285
+ 50362
286
+ ],
287
+ "suppress_ids_begin": [
288
+ 220,
289
+ 50257
290
+ ]
291
+ }
handler.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import base64
3
+ from faster_whisper import WhisperModel
4
+ import logging
5
+
6
+ logging.basicConfig(level=logging.DEBUG)
7
+
8
+
9
+ class EndpointHandler:
10
+ def __init__(self, path=""):
11
+ self.model = WhisperModel("large-v2", num_workers=30)
12
+
13
+ def __call__(self, data: dict[str, str]):
14
+ # process inputs
15
+ inputs = data.pop("inputs", data)
16
+ language = data.pop("language", "de")
17
+ task = data.pop("task", "transcribe")
18
+
19
+ # Decode base64 string to bytes
20
+ audio_bytes_decoded = base64.b64decode(inputs)
21
+ logging.debug(f"Decoded Bytes Length: {len(audio_bytes_decoded)}")
22
+ audio_bytes = io.BytesIO(audio_bytes_decoded)
23
+
24
+ # run inference pipeline
25
+ logging.info("Running inference...")
26
+ segments, info = self.model.transcribe(audio_bytes, language=language, task=task)
27
+
28
+ # postprocess the prediction
29
+ full_text = []
30
+ for segment in segments:
31
+ full_text.append({"segmentId": segment.id,
32
+ "text": segment.text,
33
+ "timestamps": {
34
+ "start": segment.start,
35
+ "end": segment.end
36
+ }
37
+ })
38
+
39
+ if segment.id % 100 == 0:
40
+ logging.info("segment " + str(segment.id) + " transcribed")
41
+ logging.info("Inference completed.")
42
+
43
+ return full_text
requirements.txt ADDED
Binary file (104 Bytes). View file
 
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
vocabulary.txt ADDED
The diff for this file is too large to render. See raw diff