|
import spaces |
|
|
|
import os |
|
import json |
|
from vllm import LLM, SamplingParams |
|
from transformers import AutoTokenizer |
|
|
|
|
|
@spaces.GPU() |
|
def predict(message, history, system_prompt, temperature, max_tokens): |
|
messages = [{"role": "system", "content": system_prompt}] |
|
for human, assistant in history: |
|
messages.append({"role": "user", "content": human}) |
|
messages.append({"role": "assistant", "content": assistant}) |
|
messages.append({"role": "user", "content": message}) |
|
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) |
|
stop_tokens = ["<|im_end|>", "<|endoftext|>", "<|im_start|>"] |
|
sampling_params = SamplingParams(temperature=temperature, top_p=1, max_tokens=max_tokens, stop=stop_tokens) |
|
completions = llm.generate(prompt, sampling_params) |
|
for output in completions: |
|
prompt = output.prompt |
|
print('==========================question=============================') |
|
print(prompt) |
|
generated_text = output.outputs[0].text |
|
print('===========================answer=============================') |
|
print(generated_text) |
|
for idx in range(len(generated_text)): |
|
yield generated_text[:idx+1] |
|
|
|
|
|
if __name__ == "__main__": |
|
path = "stabilityai/stablelm-2-12b-chat" |
|
tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True) |
|
llm = LLM(model=path, tensor_parallel_size=1, trust_remote_code=True) |
|
gr.ChatInterface( |
|
predict, |
|
title="LLM playground", |
|
description="This is a LLM playground for StableLM", |
|
theme="soft", |
|
chatbot=gr.Chatbot(height=1400, label="Chat History",), |
|
textbox=gr.Textbox(placeholder="input", container=False, scale=7), |
|
retry_btn=None, |
|
undo_btn="Delete Previous", |
|
clear_btn="Clear", |
|
additional_inputs=[ |
|
gr.Textbox("You are a hepful assistant.", label="System Prompt"), |
|
gr.Slider(0, 1, 0.7, label="Temperature"), |
|
gr.Slider(100, 2048, 1024, label="Max Tokens"), |
|
], |
|
additional_inputs_accordion_name="Parameters", |
|
examples=[ |
|
["implement snake game using pygame"], |
|
["Can you explain briefly to me what is the Python programming language?"], |
|
["write a program to find the factorial of a number"], |
|
], |
|
).queue().launch() |