Spaces:
Sleeping
Sleeping
import gradio as gr | |
import torch | |
from huggingface_hub import InferenceClient | |
from transformers import BarkModel | |
from transformers import AutoProcessor | |
model = BarkModel.from_pretrained("suno/bark-small") | |
device = "cuda:0" if torch.cuda.is_available() else "cpu" | |
model = model.to(device) | |
processor = AutoProcessor.from_pretrained("suno/bark") | |
""" | |
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference | |
""" | |
client = InferenceClient("meta-llama/Meta-Llama-3-8B-Instruct") | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot(type="messages") | |
audio_box = gr.Audio(autoplay=True) | |
msg = gr.Textbox(submit_btn=True) | |
clear = gr.Button("Clear") | |
def user(user_message, history: list): | |
return "", history + [{"role": "user", "content": user_message}] | |
def bot(history: list): | |
history.append({"role": "assistant", "content": ""}) | |
for message in client.chat_completion( | |
history, | |
stream=True, | |
): | |
token = message.choices[0].delta.content | |
history[-1]["content"] += token | |
yield history | |
return history | |
def read(history: list): | |
text = history[-1]["content"] | |
inputs = processor(text=text, return_tensors="pt").to(device) | |
speech = model.generate(**inputs.to(device)) | |
sampling_rate = model.generation_config.sample_rate | |
return tuple((sampling_rate, speech.cpu().numpy().squeeze())) | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
bot, chatbot, chatbot | |
).then(read, chatbot, audio_box) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
if __name__ == "__main__": | |
demo.launch() | |