File size: 1,709 Bytes
3597c88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import gradio as gr
import time
import os
from pydub import AudioSegment

def audio_converter(audio_file:str):
    audio_input = AudioSegment.from_file(audio_file,'m4a')
    audio_input_name = os.path.splitext(audio_file)[0]
    audio_wav_filename = f"{audio_input_name}.wav"
    audio_input.export(audio_wav_filename, 'wav')
    
    return audio_wav_filename

def asr_transcriber(audio_file):
    from transformers import pipeline
    import torch
    import random

    audio_file_wav = audio_converter(audio_file)

    device_id = "mps"
    flash = False
   
    # Initialize the ASR pipeline
    pipe = pipeline(
        "automatic-speech-recognition",
        model="openai/whisper-large-v3",
        torch_dtype=torch.float16,
        device=device_id
    )
    if device_id == "mps":
            torch.mps.empty_cache()
    elif not flash:
        pipe.model = pipe.model.to_bettertransformer()
        
    ts = True
    language = None
    task = "transcribe"

    json_output = pipe(
        audio_file_wav, 
        chunk_length_s=30, 
        batch_size=2, 
        generate_kwargs={"task": task, "language": language},
        return_timestamps=ts
    )

    return json_output["text"]

with gr.Blocks() as transcriberUI:
    gr.Markdown(
    """
    # Ola Xara & Solange!
    Clicar no botao abaixo para selecionar o Audio a ser transcrito!
    Ambiente de Teste: pode demorar um pouco. Nao fiquem nervosos :-)
    """)
    inp = gr.File(label="Arquivo de Audio", show_label=True, file_count="single", file_types=["m4a"])
    transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
    inp.upload(asr_transcriber, inp, transcribe)

transcriberUI.launch(share=True)