Spaces:

avfranco
/

audioqna

Build error

App Files Files Community

avfranco commited on Sep 16, 2024

Commit

03ddc3f

verified ·

1 Parent(s): c939bfd

Update app.py

Browse files

Updated based on https://huggingface.co/spaces/hf-audio/whisper-large-v3

Files changed (1) hide show

app.py +24 -59

app.py CHANGED Viewed

@@ -1,75 +1,40 @@
 import gradio as gr
-import os
-from pydub import AudioSegment
-def audio_converter(audio_file:str):
-    audio_input = AudioSegment.from_file(audio_file,'m4a')
-    audio_input_name = os.path.splitext(audio_file)[0]
-    audio_wav_filename = f"{audio_input_name}.wav"
-    audio_input.export(audio_wav_filename, 'wav')
-    return audio_wav_filename
-def asr_transcriber(audio_file):
-    from transformers import pipeline
-    import torch
-    audio_file_wav = audio_converter(audio_file)
-    # Check for CUDA availability (GPU)
-    if torch.cuda.is_available():
-        device_id = torch.device('cuda')
-    else:
-        device_id = torch.device('cpu')
-    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
-    #Mac runtime
-    #device_id = "mps"
-    #torch_dtype = torch.float16
-    flash = False
-    ts = False
-    #Try to optimize when CPU and float32
-    model_id = "openai/whisper-tiny"
-    # Initialize the ASR pipeline
-    pipe = pipeline(
-        "automatic-speech-recognition",
-        model=model_id,
-        torch_dtype=torch_dtype,
-        device=device_id
-    )
-    if device_id == "mps":
-        torch.mps.empty_cache()
-    elif not flash:
-        pipe.model = pipe.model.to_bettertransformer()
-    language = None
-    task = "transcribe"
-    json_output = pipe(
-        audio_file_wav,
-        chunk_length_s=30,
-        batch_size=8,
-        generate_kwargs={"task": task, "language": language},
-        return_timestamps=ts
-    )
-    return json_output["text"]
 with gr.Blocks() as transcriberUI:
     gr.Markdown(
     """
-    # Ola Xara & Solange!
     Clicar no botao abaixo para selecionar o Audio a ser transcrito!
-    Ambiente Demo disponivel 24x7. Running on CPU Upgrade with openai/whisper-tiny
     """)
-    inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
     transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
-    inp.upload(asr_transcriber, inp, transcribe)
 if __name__ == "__main__":
     transcriberUI.launch()

+import spaces
+import torch
 import gradio as gr
+from transformers import pipeline
+from transformers.pipelines.audio_utils import ffmpeg_read
+MODEL_NAME = "openai/whisper-large-v3"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+device = 0 if torch.cuda.is_available() else "cpu"
+pipe = pipeline(
+    task="automatic-speech-recognition",
+    model=MODEL_NAME,
+    chunk_length_s=30,
+    device=device,
+)
+@spaces.GPU
+def audio_transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
+    return  text
 with gr.Blocks() as transcriberUI:
     gr.Markdown(
     """
+    # Ola!
     Clicar no botao abaixo para selecionar o Audio a ser transcrito!
+    Ambiente Demo disponivel 24x7. Running on ZeroGPU with openai/whisper-large-v3
     """)
+    inp = gr.File(label="Arquivo de Audio", show_label=True, type="file_path", file_count="single", file_types=["mp3"])
     transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
+    inp.upload(audio_transcribe, inp, transcribe)
 if __name__ == "__main__":
     transcriberUI.launch()