import gradio as gr
from huggingface_hub import InferenceClient

client = InferenceClient(model="https://zmdcwinykvwef4-80.proxy.runpod.net")

def inference(message, history):
    partial_message = ""
    for token in client.text_generation(message, max_new_tokens=1024, stream=True):
        partial_message += token
        yield partial_message

gr.ChatInterface(
    inference,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="你可以问我任何关于巨杉数据库的问题!", container=False, scale=7),
    description="这是巨杉数据库旗下的AI智能助手由Llama2-13b为原型打造.",
    title="巨杉数据库AI智能助手",
    examples=["巨杉数据库有哪些优势?", "巨杉数据库有哪些功能"],
    retry_btn="重试",
    undo_btn="撤销",
    clear_btn="清除",
    submit_btn="提问",
).queue().launch()