Spaces:
Running
on
Zero
Running
on
Zero
File size: 3,271 Bytes
5ad5566 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 3597c88 03ddc3f 87f602f 3e9dc66 a84f44c 5f49ba8 3e9dc66 10fea9b 5aa403b 787c0bf 3e9dc66 5195d28 b040059 5195d28 0c9b4d5 be72dc1 5195d28 3597c88 0d16ed8 5195d28 0d16ed8 7894a90 3597c88 0c9b4d5 763daee 0d16ed8 5f013ee 1a99cb6 3e9dc66 0d16ed8 ac8ebf8 0d16ed8 ac8ebf8 29000fa 7894a90 5ad5566 0c9b4d5 5ad5566 3597c88 763daee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
import os
import spaces
import torch
import gradio as gr
from transformers import pipeline
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
FILE_LIMIT_MB = 1000
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(
task="automatic-speech-recognition",
model=MODEL_NAME,
chunk_length_s=30,
device=device,
)
def respond_to_question_llama(transcript, question):
from huggingface_hub import InferenceClient
client = InferenceClient(
"meta-llama/Meta-Llama-3.1-70B-Instruct",
token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
)
response = client.chat_completion(
messages=[{"role": "user", "content": f"Transcript: {transcript}\n\nUser: {question}"}],
max_tokens=4096,
).choices[0].message.content
return response
@spaces.GPU
def audio_transcribe(inputs):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": "transcribe"}, return_timestamps=True)['text']
return [text, gr.Textbox(visible=True),gr.Textbox(visible=True),gr.Textbox(visible=True)]
def hidden_ask_question():
return [gr.Textbox(visible=False),gr.Textbox(visible=False),gr.Textbox(visible=False)]
with gr.Blocks() as transcriberUI:
gr.Markdown(
"""
# Ola!
Clique no botao abaixo para selecionar o Audio que deseja conversar!
Ambiente disponivel 24x7. Running on ZeroGPU with openai/whisper-large-v3
"""
)
file_transcribe = gr.Interface(
fn=transcribe,
inputs=gr.Audio(sources="upload", type="filepath", label="Audio file", format=["mp3","m4a"]),
outputs=[audio_transcribe,ask_question,submit_question, response_output],
title="Chat with your Audio",
description=(
"Transcribe and Chat with your audio inputs with the click of a button! This prototype uses the"
f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
" of arbitrary length."
),
allow_flagging="never",
)
#inp = gr.Audio(sources="upload", type="filepath", label="Audio file", format=["mp3","m4a"])
transcribe = gr.Textbox(label="Transcricao", show_label=True, show_copy_button=True)
ask_question = gr.Textbox(label="Ask a question", visible=False)
response_output = gr.Textbox(label="Response", visible=False)
submit_question = gr.Button("Submit question", visible=False)
clear_button = gr.ClearButton([transcribe,response_output,inp, ask_question])
def ask_question_callback(transcription,question):
if ask_question:
response = respond_to_question_llama(transcription, question)
else:
response = "No question asked"
return response
#inp.upload(audio_transcribe, inputs=inp, outputs=[transcribe,ask_question,submit_question, response_output])
submit_question.click(ask_question_callback, outputs=[response_output], inputs=[transcribe, ask_question])
clear_button.click(hidden_ask_question,outputs=[ask_question,response_output,submit_question])
transcriberUI.queue().launch() |