Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,077 Bytes
3597c88 0f4639b 3597c88 87f602f 3733074 87f602f 38911b7 87f602f 0f4639b 87f602f a910371 cdf5a0d 87f602f 3597c88 87f602f 72cc5e7 3597c88 87f602f 3597c88 41b2864 3597c88 ae1cf08 3597c88 5777262 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import os
from pydub import AudioSegment
def audio_converter(audio_file:str):
audio_input = AudioSegment.from_file(audio_file,'m4a')
audio_input_name = os.path.splitext(audio_file)[0]
audio_wav_filename = f"{audio_input_name}.wav"
audio_input.export(audio_wav_filename, 'wav')
return audio_wav_filename
def asr_transcriber(audio_file):
from transformers import pipeline
import torch
audio_file_wav = audio_converter(audio_file)
# Check for CUDA availability (GPU)
if torch.cuda.is_available():
device_id = torch.device('cuda')
else:
device_id = torch.device('cpu')
torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
#Mac runtime
#device_id = "mps"
#torch_dtype = torch.float16
flash = False
ts = False
#Try to optimize when CPU and float32
model_id = "openai/whisper-small"
# Initialize the ASR pipeline
pipe = pipeline(
"automatic-speech-recognition",
model=model_id,
torch_dtype=torch_dtype,
device=device_id
)
if device_id == "mps":
torch.mps.empty_cache()
elif not flash:
pipe.model = pipe.model.to_bettertransformer()
language = None
task = "transcribe"
json_output = pipe(
audio_file_wav,
chunk_length_s=30,
batch_size=8,
generate_kwargs={"task": task, "language": language},
return_timestamps=ts
)
return json_output["text"]
with gr.Blocks() as transcriberUI:
gr.Markdown(
"""
# Ola Xara & Solange!
Clicar no botao abaixo para selecionar o Audio a ser transcrito!
Ambiente Demo disponivel 24x7. Running on CPU Upgrade with openai/whisper-tiny
""")
inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
inp.upload(asr_transcriber, inp, transcribe)
if __name__ == "__main__":
transcriberUI.launch()
|