import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("HuggingFaceH4/zephyr-7b-beta") def respond(message, history): response = "" messages = [{"role": "system", "content": "You are a friendly chatbot."}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) stream = client.chat_completion( messages, max_tokens=100, temperature=1.2, stream=True ) for message in stream: token = message.choices[0].delta.content if token is not None: response += token yield response chatbot = gr.ChatInterface(respond, type="messages") chatbot.launch()