Spaces:

ovieyra21
/

train-tts

Running

App Files Files Community

ovieyra21 commited on Jun 27, 2024

Commit

9d0e7a8

verified ·

1 Parent(s): f59d3aa

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -0

app.py CHANGED Viewed

@@ -284,6 +284,43 @@ with gr.Blocks(css=css) as demo:
     submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
     submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
 demo.launch(debug=True)

     submit_file.click(function_transcribe, inputs=[audio_file, task_file, cleaning_file, textbox_file], outputs=[dataset_file, transcript_file])
     submit_youtube.click(yt_transcribe, inputs=[audio_youtube, task_youtube, cleaning_youtube, textbox_youtube], outputs=[html_youtube, dataset_youtube, transcript_youtube])
+def function_transcribe(audio_file, task_file, cleaning_file, textbox_file, max_filesize=75.0, dataset_sampling_rate = 24000, progress=gr.Progress()):
+    if isinstance(audio_file, str):
+        audio_file = open(audio_file, "rb")
+    _, extension = os.path.splitext(audio_file.name)
+    if extension != '.mp3' and extension != '.wav':
+        raise RuntimeError("Invalid file format. Supported formats are mp3 and wav.")
+    if audio_file.size // (1024 * 1024) > FILE_LIMIT_MB:
+        raise RuntimeError(f"File size exceeds the limit ({extension} file {FILE_LIMIT_MB} MB).")
+    task = task_file.lower()
+    if task != "transcribe" and task != "translate":
+        raise RuntimeError("Unsupported task. Task must be either 'transcribe' or 'translate'.")
+    cleanup = bool(cleaning_file)
+    dataset_name = textbox_file.strip().replace("/", "_").replace(" ", "_")
+    audio_content = audio_file.read()
+    audio_array, sample_rate = wavfile.imread(BytesIO(audio_content), "wav")
+    chunks = naive_postprocess_whisper_chunks(audio_array, sample_rate, stop_chars=".<>?", min_duration=5)
+    texts = whisper_batch_transcribe(chunks, model=MODEL_NAME, device=device, task=task)
+    if cleanup:
+        cleaned_chunks = clean_audio_chunks(chunks, audio_array, sample_rate)
+        cleaned_texts = whisper_batch_transcribe(cleaned_chunks, model=MODEL_NAME, device=device, task=task)
+        texts = cleaned_texts
+    texts = [t.strip() for t in texts]
+    dataset = Dataset.from_dict({"text": texts})
+    if dataset_name:
+        dataset.push_to_hub(dataset_name, repo_type="dataset", private=True)
+    return dataset, "\n\n".join(texts)
 demo.launch(debug=True)