import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient(model="https://zgg3nzdpswxy4a-80.proxy.runpod.net/") def inference(message, history): partial_message = "" for token in client.text_generation(message, max_new_tokens=128, stream=True): partial_message += token yield partial_message gr.ChatInterface( inference, chatbot=gr.Chatbot(height=300), textbox=gr.Textbox(placeholder="Please ask your question here...", container=False, scale=7), description="This is a chatbot trained on the Llama2-13b model.", title="ChatSDB", examples=["What is SequioaDB?", "What is SequioaDB's license?", "What is SequioaDB's official website?"], retry_btn="Retry", undo_btn="Undo", clear_btn="Clear", submit_btn="Submit", ).queue().launch()