Spaces:
Running
on
Zero
Running
on
Zero
File size: 2,632 Bytes
5ad5566 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 87f602f 3e9dc66 a84f44c 5f49ba8 3e9dc66 10fea9b 5aa403b 787c0bf 3e9dc66 5195d28 0c9b4d5 be72dc1 5195d28 3597c88 0d16ed8 5195d28 0d16ed8 908a6e1 3597c88 0c9b4d5 763daee 0d16ed8 5f013ee 1a99cb6 3e9dc66 0d16ed8 ac8ebf8 0d16ed8 ac8ebf8 29000fa e5c7656 5ad5566 0c9b4d5 5ad5566 3597c88 763daee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 |
import os
import spaces
import torch
import gradio as gr
from transformers import pipeline
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def respond_to_question_llama(transcript, question):
from huggingface_hub import InferenceClient
client = InferenceClient(
"meta-llama/Meta-Llama-3.1-70B-Instruct",
token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
)
response = client.chat_completion(
messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
max_tokens=4096,
).choices[0].message.content
return response
@spaces.GPU
def audio_transcribe(inputs):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, return_timestamps=True)["text"]
return [text, gr.Textbox(visible=True),gr.Textbox(visible=True),gr.Textbox(visible=True)]
def hidden_ask_question():
return [gr.Textbox(visible=False),gr.Textbox(visible=False),gr.Textbox(visible=False)]
with gr.Blocks() as transcriberUI:
gr.Markdown(
"""
# Ola!
Clique no botao abaixo para selecionar o Audio que deseja conversar!
Ambiente disponivel 24x7. Running on ZeroGPU with openai/whisper-large-v3
"""
)
inp = gr.File(label="Arquivo de Audio", show_label=True, type="filepath", file_count="single", file_types=["mp3","m4a"])
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
ask_question = gr.Textbox(label="Ask a question", visible=False)
response_output = gr.Textbox(label="Response", visible=False)
submit_question = gr.Button("Submit question", visible=False)
clear_button = gr.ClearButton([transcribe,response_output,inp, ask_question])
def ask_question_callback(transcription,question):
if ask_question:
response = respond_to_question_llama(transcription, question)
else:
response = "No question asked"
return response
inp.upload(audio_transcribe, inputs=inp, outputs=[transcribe,ask_question,submit_question, response_output])
submit_question.click(ask_question_callback, outputs=[response_output], inputs=[transcribe, ask_question])
clear_button.click(hidden_ask_question,outputs=[ask_question,response_output,submit_question])
transcriberUI.queue().launch() |