Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import os | |
from pydub import AudioSegment | |
def audio_converter(audio_file:str): | |
audio_input = AudioSegment.from_file(audio_file,'m4a') | |
audio_input_name = os.path.splitext(audio_file)[0] | |
audio_wav_filename = f"{audio_input_name}.wav" | |
audio_input.export(audio_wav_filename, 'wav') | |
return audio_wav_filename | |
def asr_transcriber(audio_file): | |
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline | |
import torch | |
import optimum | |
audio_file_wav = audio_converter(audio_file) | |
# Check for CUDA availability (GPU) | |
if torch.cuda.is_available(): | |
device_id = torch.device('cuda') | |
else: | |
device_id = torch.device('cpu') | |
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32 | |
#device_id = "mps" for Mac only | |
#torch_dtype = float16 | |
flash = False | |
ts = True | |
#Try to optimize when CPU and float32 | |
model_id = "openai/whisper-small" | |
# Initialize the ASR pipeline | |
pipe = pipeline( | |
"automatic-speech-recognition", | |
model=model_id, | |
torch_dtype=torch_dtype, | |
device=device_id, | |
) | |
if device_id == "mps": | |
torch.mps.empty_cache() | |
elif not flash: | |
pipe.model = pipe.model.to_bettertransformer() | |
language = None | |
task = "transcribe" | |
json_output = pipe( | |
audio_file_wav, | |
chunk_length_s=30, | |
batch_size=2, | |
generate_kwargs={"task": task, "language": language}, | |
return_timestamps=ts | |
) | |
return json_output["text"] | |
with gr.Blocks() as transcriberUI: | |
gr.Markdown( | |
""" | |
# Ola Xara & Solange! | |
Clicar no botao abaixo para selecionar o Audio a ser transcrito! | |
Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-) | |
""") | |
inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"]) | |
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True) | |
inp.upload(asr_transcriber, inp, transcribe) | |
if __name__ == "__main__": | |
transcriberUI.launch() | |