Spaces:
Sleeping
Sleeping
import torch | |
import spaces | |
import numpy as np | |
import gradio as gr | |
from gtts import gTTS | |
from transformers import pipeline | |
from huggingface_hub import InferenceClient | |
ASR_MODEL_NAME = "openai/whisper-small" | |
NLP_MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2" | |
system_prompt = """"<s> [INST] You are Friday a helpful and conversational assistant. [/INST]""" | |
client = InferenceClient(NLP_MODEL_NAME) | |
device = 0 if torch.cuda.is_available() else "cpu" | |
pipe = pipeline( | |
task="automatic-speech-recognition", | |
model=ASR_MODEL_NAME, | |
device=device, | |
) | |
def generate(prompt, temperature=0.1, max_new_tokens=64, top_p=0.95, repetition_penalty=1.0): | |
temperature = float(temperature) | |
if temperature < 1e-2: | |
temperature = 1e-2 | |
top_p = float(top_p) | |
generate_kwargs = dict( | |
temperature=temperature, | |
max_new_tokens=max_new_tokens, | |
top_p=top_p, | |
repetition_penalty=repetition_penalty, | |
do_sample=True, | |
seed=42, | |
) | |
formatted_prompt = system_prompt + f""" {prompt} </s>""" | |
output = client.text_generation( | |
formatted_prompt, **generate_kwargs, stream=False, details=False, return_full_text=False) | |
print(output) | |
return output | |
def transcribe(audio): | |
sr, y = audio | |
y = y.astype(np.float32) | |
y /= np.max(np.abs(y)) | |
inputs = pipe({"sampling_rate": sr, "raw": y})["text"] | |
print("User transcription: ", inputs) | |
response = generate(inputs) | |
audio_response = gTTS(response) | |
audio_response.save("response.mp3") | |
print(audio_response) | |
return "response.mp3" | |
with gr.Blocks() as demo: | |
gr.HTML("<center><h1>Friday: AI Virtual Assistant<h1><center>") | |
with gr.Row(): | |
audio_input = gr.Audio(label="Human", sources="microphone") | |
output_audio = gr.Audio(label="Friday", type="filepath", | |
interactive=False, | |
autoplay=True, | |
elem_classes="audio") | |
transcribe_btn = gr.Button("Transcribe") | |
transcribe_btn.click(fn=transcribe, inputs=audio_input, | |
outputs=output_audio) | |
demo.queue() | |
demo.launch() | |