Spaces:
Sleeping
Sleeping
File size: 3,150 Bytes
d51e19d 3721320 d51e19d 1b34aa5 d51e19d 3721320 ed73c38 1b34aa5 61b53d6 1b34aa5 3721320 1b34aa5 d51e19d 3721320 d51e19d 3721320 d51e19d 86b5bcb d51e19d 5ba294c d51e19d 61b53d6 d51e19d ed73c38 86b5bcb ed73c38 61b53d6 3721320 1b34aa5 ed73c38 86b5bcb 1b34aa5 ed73c38 3721320 61b53d6 d51e19d ed73c38 1b34aa5 d51e19d 61b53d6 d51e19d 61b53d6 6ee0077 61b53d6 d51e19d b4920d8 ed73c38 d51e19d c8b7fcf ed73c38 f76ff96 61b53d6 d51e19d 2b54ce7 61b53d6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
import torch
import spaces
import numpy as np
import gradio as gr
from gtts import gTTS
from transformers import pipeline
from huggingface_hub import InferenceClient
# Model names
ASR_MODEL_NAME = "openai/whisper-small"
LLM_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"
# Initial system prompt
system_prompt = """"<s>[INST] You are Friday, a helpful and conversational AI assistant, and you respond with one to two sentences. [/INST] Hello there! I'm Friday, how can I help you?</s>"""
# Global variables for history
instruct_history = system_prompt
formatted_history = ""
# Create inference client for text generation
client = InferenceClient(LLM_MODEL_NAME)
# Set device for ASR pipeline
device = 0 if torch.cuda.is_available() else "cpu"
# ASR pipeline
pipe = pipeline(
task="automatic-speech-recognition",
model=ASR_MODEL_NAME,
device=device,
)
def generate(instruct_history, temperature=0.1, max_new_tokens=128, top_p=0.95, repetition_penalty=1.0):
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=42,
)
output = client.text_generation(
instruct_history, **generate_kwargs, stream=False, details=False, return_full_text=False)
return output
@spaces.GPU(duration=60)
def transcribe(audio, past_history):
global instruct_history, formatted_history
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
# Transcribe user audio
transcribed_user_audio = pipe({"sampling_rate": sr, "raw": y})["text"]
# Append user input to history
formatted_history += past_history
formatted_history += f"π Human: {transcribed_user_audio}\n\n"
instruct_history += f"<s>[INST] {transcribed_user_audio} [/INST] "
# Generate LLM response
llm_response = generate(instruct_history)
# Append AI response to history
instruct_history += f" {llm_response}</s>"
formatted_history += f"π€ Friday: {llm_response}\n\n"
# Convert AI response to audio
audio_response = gTTS(llm_response)
audio_response.save("response.mp3")
print("Formatted History: ", formatted_history)
# Return the full conversation history
return "response.mp3", formatted_history
with gr.Blocks() as demo:
gr.HTML("<center><h1>Friday: AI Virtual Assistant π€</h1><center>")
with gr.Row():
audio_input = gr.Audio(label="Human", sources="microphone")
output_audio = gr.Audio(label="Friday", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
transcribe_btn = gr.Button("Transcribe")
# Textbox to display the full conversation history
transcription_box = gr.Textbox(label="Transcription", lines=10, placeholder="Conversation History...")
transcribe_btn.click(fn=transcribe, inputs=[audio_input, transcription_box], outputs=[output_audio, transcription_box])
if __name__ == "__main__":
demo.queue()
demo.launch() |