File size: 2,561 Bytes
5ad5566
03ddc3f
 
3597c88
12fa41c
03ddc3f
 
3597c88
03ddc3f
 
 
3597c88
03ddc3f
3597c88
03ddc3f
 
 
 
 
 
87f602f
3597c88
 
0d16ed8
 
 
 
 
 
a3ea009
3597c88
29000fa
 
 
0d16ed8
5f013ee
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8e1be5
ed96fe7
a8e1be5
 
5f013ee
ed96fe7
a8e1be5
 
 
5f013ee
1a99cb6
5f013ee
0d16ed8
 
 
 
29000fa
 
528f59d
5ad5566
 
3597c88
a3ea009
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import os
import spaces
import torch
import gradio as gr
import openai
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read

MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

with gr.Blocks() as transcriberUI:
    gr.Markdown(
        """
        # Ola!
        Clicar no botao abaixo para selecionar o Audio a ser transcrito!
        Ambiente Demo disponivel 24x7. Running on ZeroGPU with openai/whisper-large-v3
        """
    )
    inp = gr.File(label="Arquivo de Audio", show_label=True, type="filepath", file_count="single", file_types=["mp3"])
    transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
    ask_question = gr.Textbox(label="Ask a question", visible=True)
    response_output = gr.Textbox(label="Response", visible=True)
    submit_question = gr.Button("Submit question", visible=True)

    @spaces.GPU
    def respond_to_question(transcript, question):
        # Optionally, use OpenAI API to generate a response to the user's question
        # based on the transcript
        response = ""
        # Replace this with your OpenAI API key
        openai.api_key = os.environ["OPENAI_API_KEY"]
        response = openai.Completion.create(
            engine="gpt-4o-mini",
            prompt=f"Transcript: {transcript}\n\nUser: {question}\n\nAI:",
            temperature=0.3,
            max_tokens=60,
            top_p=1,
            frequency_penalty=0,
            presence_penalty=0
        ).choices[0].text
        return response
        
    @spaces.GPU
    def audio_transcribe(inputs):
        if inputs is None:
            raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")

        text = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)["text"]
    
        return  text

    def ask_question_callback(transcription,question):
        if ask_question:
            response = respond_to_question(transcription, question)
            response_output.value = response
        else:
            response_output.value = "No question asked"

        return response_output

    inp.upload(audio_transcribe, inputs=inp, outputs=transcribe)
    submit_question.click(ask_question_callback, outputs=[response_output], inputs=[transcribe, ask_question])


transcriberUI.queue().launch()