Spaces:

wangzhang
/

chatSDB-test

Sleeping

File size: 4,186 Bytes

0fc60cb
 
 
e4aa56d
0fc60cb
 
 
e4aa56d
0fc60cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4aa56d
8f14c60
0fc60cb
 
 
 
 
614e7ac
 
0fc60cb
614e7ac
0fc60cb
 
 
 
 
 
 
614e7ac
 
0fc60cb
614e7ac
0fc60cb

from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = 4096

DESCRIPTION = """\
# ChatSDB

这是SequioaDB旗下的AI智能大语言模型，训练超过上万条真实数据和7亿参数。
ChatSDB是SequoiaDB旗下的AI智能大语言模型，训练超过上万条真实数据和7亿参数</h3>
<br><strong>模型🔗: <a>https://huggingface.co/wangzhang/ChatSDB </a></strong>
<br><strong>Dataset🔗: <a>https://huggingface.co/datasets/wangzhang/sdb </a></strong>
<br><strong> API Doc🔗: <a>https://zgg3nzdpswxy4a-80.proxy.runpod.net/docs/ <a> </strong>
"""

LICENSE = """ """

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"


if torch.cuda.is_available():
    model_id = "wangzhang/ChatSDB-hf"
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.use_default_system_prompt = False


@spaces.GPU
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    system_prompt: str,
    max_new_tokens: int = 1024,
    temperature: float = 0.6,
    top_p: float = 0.9,
    top_k: int = 50,
    repetition_penalty: float = 1.2,
) -> Iterator[str]:
    conversation = []
    if system_prompt:
        conversation.append({"role": "system", "content": system_prompt})
    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    chat = tokenizer.apply_chat_template(conversation, tokenize=False)
    inputs = tokenizer(chat, return_tensors="pt", add_special_tokens=False).to("cuda")
    if len(inputs) > MAX_INPUT_TOKEN_LENGTH:
        inputs = inputs[-MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning("Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")

    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        inputs,
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
        repetition_penalty=repetition_penalty,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)


chat_interface = gr.ChatInterface(
    fn=generate,
    additional_inputs=[
        gr.Textbox(label="System prompt", lines=6),
        gr.Slider(
            label="Max new tokens",
            minimum=1,
            maximum=MAX_MAX_NEW_TOKENS,
            step=1,
            value=DEFAULT_MAX_NEW_TOKENS,
        ),
        gr.Slider(
            label="Temperature",
            minimum=0.1,
            maximum=4.0,
            step=0.1,
            value=0.6,
        ),
        gr.Slider(
            label="Top-p (nucleus sampling)",
            minimum=0.05,
            maximum=1.0,
            step=0.05,
            value=0.9,
        ),
        gr.Slider(
            label="Top-k",
            minimum=1,
            maximum=1000,
            step=1,
            value=50,
        ),
        gr.Slider(
            label="Repetition penalty",
            minimum=1.0,
            maximum=2.0,
            step=0.05,
            value=1.2,
        ),
    ],
    stop_btn=None,
    examples=[
        ["如何安装巨杉数据库SequioaDB？"],
        ["巨杉数据库SequioaDB有哪些优势？"],
        ["巨杉数据库SequioaDB是什么？"],
    ],
)

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    chat_interface.render()
    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.queue(max_size=20).launch()