audioqna / transcriberUI.py
avfranco's picture
audio transcriber
3597c88
raw
history blame
1.71 kB
import gradio as gr
import time
import os
from pydub import AudioSegment
def audio_converter(audio_file:str):
audio_input = AudioSegment.from_file(audio_file,'m4a')
audio_input_name = os.path.splitext(audio_file)[0]
audio_wav_filename = f"{audio_input_name}.wav"
audio_input.export(audio_wav_filename, 'wav')
return audio_wav_filename
def asr_transcriber(audio_file):
from transformers import pipeline
import torch
import random
audio_file_wav = audio_converter(audio_file)
device_id = "mps"
flash = False
# Initialize the ASR pipeline
pipe = pipeline(
"automatic-speech-recognition",
model="openai/whisper-large-v3",
torch_dtype=torch.float16,
device=device_id
)
if device_id == "mps":
torch.mps.empty_cache()
elif not flash:
pipe.model = pipe.model.to_bettertransformer()
ts = True
language = None
task = "transcribe"
json_output = pipe(
audio_file_wav,
chunk_length_s=30,
batch_size=2,
generate_kwargs={"task": task, "language": language},
return_timestamps=ts
)
return json_output["text"]
with gr.Blocks() as transcriberUI:
gr.Markdown(
"""
# Ola Xara & Solange!
Clicar no botao abaixo para selecionar o Audio a ser transcrito!
Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-)
""")
inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
inp.upload(asr_transcriber, inp, transcribe)
transcriberUI.launch(share=True)