|
import gradio as gr |
|
from huggingface_hub import InferenceClient |
|
|
|
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") |
|
|
|
def respond(message, history): |
|
response = "" |
|
|
|
messages = [{"role": "system", "content": "You are a friendly chatbot."}] |
|
|
|
if history: |
|
messages.extend(history) |
|
|
|
messages.append({"role": "user", "content": message}) |
|
|
|
stream = client.chat_completion( |
|
messages, |
|
max_tokens=100, |
|
temperature=1.2, |
|
stream=True |
|
) |
|
|
|
for message in stream: |
|
token = message.choices[0].delta.content |
|
|
|
if token is not None: |
|
response += token |
|
yield response |
|
|
|
chatbot = gr.ChatInterface(respond, type="messages") |
|
|
|
chatbot.launch() |
|
|