Update app.py
Browse files
app.py
CHANGED
@@ -284,6 +284,43 @@ with gr.Blocks(css=css) as demo:
|
|
284 |
|
285 |
submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
|
286 |
submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
|
289 |
demo.launch(debug=True)
|
|
|
284 |
|
285 |
submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
|
286 |
submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
|
287 |
+
def function_transcribe(audio_file, task_file, cleaning_file, textbox_file, max_filesize=75.0, dataset_sampling_rate = 24000, progress=gr.Progress()):
|
288 |
+
if isinstance(audio_file, str):
|
289 |
+
audio_file = open(audio_file, "rb")
|
290 |
+
|
291 |
+
_, extension = os.path.splitext(audio_file.name)
|
292 |
+
if extension != '.mp3' and extension != '.wav':
|
293 |
+
raise RuntimeError("Invalid file format. Supported formats are mp3 and wav.")
|
294 |
+
|
295 |
+
if audio_file.size // (1024 * 1024) > FILE_LIMIT_MB:
|
296 |
+
raise RuntimeError(f"File size exceeds the limit ({extension} file {FILE_LIMIT_MB} MB).")
|
297 |
+
|
298 |
+
task = task_file.lower()
|
299 |
+
if task != "transcribe" and task != "translate":
|
300 |
+
raise RuntimeError("Unsupported task. Task must be either 'transcribe' or 'translate'.")
|
301 |
+
|
302 |
+
cleanup = bool(cleaning_file)
|
303 |
+
|
304 |
+
dataset_name = textbox_file.strip().replace("/", "_").replace(" ", "_")
|
305 |
+
|
306 |
+
audio_content = audio_file.read()
|
307 |
+
audio_array, sample_rate = wavfile.imread(BytesIO(audio_content), "wav")
|
308 |
+
|
309 |
+
chunks = naive_postprocess_whisper_chunks(audio_array, sample_rate, stop_chars=".<>?", min_duration=5)
|
310 |
+
|
311 |
+
texts = whisper_batch_transcribe(chunks, model=MODEL_NAME, device=device, task=task)
|
312 |
+
|
313 |
+
if cleanup:
|
314 |
+
cleaned_chunks = clean_audio_chunks(chunks, audio_array, sample_rate)
|
315 |
+
cleaned_texts = whisper_batch_transcribe(cleaned_chunks, model=MODEL_NAME, device=device, task=task)
|
316 |
+
texts = cleaned_texts
|
317 |
|
318 |
+
texts = [t.strip() for t in texts]
|
319 |
+
|
320 |
+
dataset = Dataset.from_dict({"text": texts})
|
321 |
+
if dataset_name:
|
322 |
+
dataset.push_to_hub(dataset_name, repo_type="dataset", private=True)
|
323 |
+
|
324 |
+
return dataset, "\n\n".join(texts)
|
325 |
|
326 |
demo.launch(debug=True)
|