import gradio as gr import os from pydub import AudioSegment def audio_converter(audio_file:str): audio_input = AudioSegment.from_file(audio_file,'m4a') audio_input_name = os.path.splitext(audio_file)[0] audio_wav_filename = f"{audio_input_name}.wav" audio_input.export(audio_wav_filename, 'wav') return audio_wav_filename def asr_transcriber(audio_file): from transformers import pipeline import torch audio_file_wav = audio_converter(audio_file) # Check for CUDA availability (GPU) if torch.cuda.is_available(): device_id = torch.device('cuda') else: device_id = torch.device('cpu') torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 #Mac runtime #device_id = "mps" #torch_dtype = torch.float16 flash = False ts = False #Try to optimize when CPU and float32 model_id = "openai/whisper-small" # Initialize the ASR pipeline pipe = pipeline( "automatic-speech-recognition", model=model_id, torch_dtype=torch_dtype, device=device_id ) if device_id == "mps": torch.mps.empty_cache() elif not flash: pipe.model = pipe.model.to_bettertransformer() language = None task = "transcribe" json_output = pipe( audio_file_wav, chunk_length_s=30, batch_size=8, generate_kwargs={"task": task, "language": language}, return_timestamps=ts ) return json_output["text"] with gr.Blocks() as transcriberUI: gr.Markdown( """ # Ola Xara & Solange! Clicar no botao abaixo para selecionar o Audio a ser transcrito! Ambiente Demo disponivel 24x7. Running on CPU Upgrade with openai/whisper-tiny """) inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"]) transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True) inp.upload(asr_transcriber, inp, transcribe) if __name__ == "__main__": transcriberUI.launch()