Spaces:
Running
Running
-Live
Browse files
app.py
CHANGED
@@ -157,7 +157,6 @@ def save_transcription(transcription):
|
|
157 |
f.write(transcription)
|
158 |
return file_path
|
159 |
|
160 |
-
|
161 |
def get_model_options(pipeline_type):
|
162 |
if pipeline_type == "faster-batched":
|
163 |
return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
|
@@ -212,15 +211,13 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
212 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
213 |
audio_path = trimmed_audio_path
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
segments, info =
|
218 |
-
end_time_perf = time.time()
|
219 |
else:
|
220 |
-
|
221 |
-
result = pipe(audio_path)
|
222 |
segments = result["chunks"]
|
223 |
-
|
224 |
|
225 |
transcription_time = end_time_perf - start_time_perf
|
226 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
@@ -236,12 +233,12 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
236 |
transcription = ""
|
237 |
|
238 |
for segment in segments:
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
|
|
243 |
transcription += transcription_segment
|
244 |
-
|
245 |
if verbose:
|
246 |
yield metrics_output, transcription, None
|
247 |
|
@@ -289,12 +286,11 @@ iface = gr.Interface(
|
|
289 |
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
|
290 |
],
|
291 |
cache_examples=False,
|
292 |
-
live=True
|
293 |
)
|
294 |
|
295 |
-
iface.launch()
|
296 |
-
|
297 |
pipeline_type_dropdown = iface.inputs[1]
|
298 |
model_dropdown = iface.inputs[2]
|
299 |
|
300 |
-
pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
|
|
|
|
|
|
157 |
f.write(transcription)
|
158 |
return file_path
|
159 |
|
|
|
160 |
def get_model_options(pipeline_type):
|
161 |
if pipeline_type == "faster-batched":
|
162 |
return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
|
|
|
211 |
trimmed_audio_path = trim_audio(audio_path, start_time or 0, end_time)
|
212 |
audio_path = trimmed_audio_path
|
213 |
|
214 |
+
start_time_perf = time.time()
|
215 |
+
if pipeline_type in ["faster-batched", "faster-sequenced"]:
|
216 |
+
segments, info = pipeline(audio_path, batch_size=batch_size)
|
|
|
217 |
else:
|
218 |
+
result = pipeline(audio_path)
|
|
|
219 |
segments = result["chunks"]
|
220 |
+
end_time_perf = time.time()
|
221 |
|
222 |
transcription_time = end_time_perf - start_time_perf
|
223 |
audio_file_size = os.path.getsize(audio_path) / (1024 * 1024)
|
|
|
233 |
transcription = ""
|
234 |
|
235 |
for segment in segments:
|
236 |
+
transcription_segment = (
|
237 |
+
f"[{segment.start:.2f}s -> {segment.end:.2f}s] {segment.text}\n"
|
238 |
+
if pipeline_type in ["faster-batched", "faster-sequenced"] else
|
239 |
+
f"[{segment['timestamp'][0]:.2f}s -> {segment['timestamp'][1]:.2f}s] {segment['text']}\n"
|
240 |
+
)
|
241 |
transcription += transcription_segment
|
|
|
242 |
if verbose:
|
243 |
yield metrics_output, transcription, None
|
244 |
|
|
|
286 |
["path/to/local/audio.mp3", "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, False]
|
287 |
],
|
288 |
cache_examples=False,
|
|
|
289 |
)
|
290 |
|
|
|
|
|
291 |
pipeline_type_dropdown = iface.inputs[1]
|
292 |
model_dropdown = iface.inputs[2]
|
293 |
|
294 |
+
pipeline_type_dropdown.change(update_model_dropdown, inputs=[pipeline_type_dropdown], outputs=[model_dropdown])
|
295 |
+
|
296 |
+
iface.launch()
|