ovieyra21 commited on
Commit
9d0e7a8
·
verified ·
1 Parent(s): f59d3aa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -0
app.py CHANGED
@@ -284,6 +284,43 @@ with gr.Blocks(css=css) as demo:
284
 
285
  submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
286
  submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
 
 
 
 
 
 
 
 
288
 
289
  demo.launch(debug=True)
 
284
 
285
  submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
286
  submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
287
+ def function_transcribe(audio_file, task_file, cleaning_file, textbox_file, max_filesize=75.0, dataset_sampling_rate = 24000, progress=gr.Progress()):
288
+ if isinstance(audio_file, str):
289
+ audio_file = open(audio_file, "rb")
290
+
291
+ _, extension = os.path.splitext(audio_file.name)
292
+ if extension != '.mp3' and extension != '.wav':
293
+ raise RuntimeError("Invalid file format. Supported formats are mp3 and wav.")
294
+
295
+ if audio_file.size // (1024 * 1024) > FILE_LIMIT_MB:
296
+ raise RuntimeError(f"File size exceeds the limit ({extension} file {FILE_LIMIT_MB} MB).")
297
+
298
+ task = task_file.lower()
299
+ if task != "transcribe" and task != "translate":
300
+ raise RuntimeError("Unsupported task. Task must be either 'transcribe' or 'translate'.")
301
+
302
+ cleanup = bool(cleaning_file)
303
+
304
+ dataset_name = textbox_file.strip().replace("/", "_").replace(" ", "_")
305
+
306
+ audio_content = audio_file.read()
307
+ audio_array, sample_rate = wavfile.imread(BytesIO(audio_content), "wav")
308
+
309
+ chunks = naive_postprocess_whisper_chunks(audio_array, sample_rate, stop_chars=".<>?", min_duration=5)
310
+
311
+ texts = whisper_batch_transcribe(chunks, model=MODEL_NAME, device=device, task=task)
312
+
313
+ if cleanup:
314
+ cleaned_chunks = clean_audio_chunks(chunks, audio_array, sample_rate)
315
+ cleaned_texts = whisper_batch_transcribe(cleaned_chunks, model=MODEL_NAME, device=device, task=task)
316
+ texts = cleaned_texts
317
 
318
+ texts = [t.strip() for t in texts]
319
+
320
+ dataset = Dataset.from_dict({"text": texts})
321
+ if dataset_name:
322
+ dataset.push_to_hub(dataset_name, repo_type="dataset", private=True)
323
+
324
+ return dataset, "\n\n".join(texts)
325
 
326
  demo.launch(debug=True)