Spaces:
Sleeping
Sleeping
import gradio as gr | |
from huggingface_hub import InferenceClient | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") | |
def respond(message, history): | |
response = "" | |
messages = [{"role": "system", "content": "You are a friendly chatbot."}] | |
if history: | |
messages.extend(history) | |
messages.append({"role": "user", "content": message}) | |
stream = client.chat_completion( | |
messages, | |
max_tokens=100, | |
temperature=1.2, | |
stream=True | |
) | |
for message in stream: | |
token = message.choices[0].delta.content | |
if token is not None: | |
response += token | |
yield response | |
chatbot = gr.ChatInterface(respond, type="messages") | |
chatbot.launch() | |