File size: 2,886 Bytes
9dc0e21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
"""
https://github.com/abetlen/llama-cpp-python/blob/main/examples/gradio_chat/local.py
https://github.com/awinml/llama-cpp-python-bindings
"""
from simulator import Simulator
from llama_cpp import Llama
import llama_cpp.llama_tokenizer
class Qwen2Simulator(Simulator):
def __init__(self, model_name_or_path=None):
# self.llm = llama_cpp.Llama.from_pretrained(
# repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
# filename="*q8_0.gguf", #
# tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
# "Qwen/Qwen1.5-0.5B-Chat"
# ),
# verbose=False,
# )
self.llm = Llama(
model_path="Qwen/Qwen1.5-0.5B-Chat-GGUF/qwen1_5-0_5b-chat-q8_0.gguf",
# n_gpu_layers=-1, # Uncomment to use GPU acceleration
# seed=1337, # Uncomment to set a specific seed
# n_ctx=2048, # Uncomment to increase the context window
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
"/workspace/czy/model_weights/Qwen1.5-0.5B-Chat/"
),
verbose=False,
)
def generate_query(self, messages):
"""
:param messages:
:return:
"""
assert messages[-1]["role"] != "user"
inputs = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False,
)
inputs = inputs + "<|im_start|>user\n"
return self._generate(inputs)
# for new_text in self._stream_generate(input_ids):
# yield new_text
def generate_response(self, messages):
assert messages[-1]["role"] == "user"
inputs = self.tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
return self._generate(inputs)
# for new_text in self._stream_generate(input_ids):
# yield new_text
def _generate(self, inputs):
# stream=False
output = self.llm(
inputs,
max_tokens=20,
temperature=0.7,
stop=["<|im_end|>"]
)
output_text = output["choices"][0]["text"]
return output_text
bot = Qwen2Simulator(r"E:\data_model\Qwen2-0.5B-Instruct")
if __name__ == "__main__":
messages = [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "What is the capital of France?"}
]
output = bot.generate_response(messages)
print(output)
messages = [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "hi, what your name"},
{"role": "assistant", "content": "My name is Jordan"}
]
output = bot.generate_query(messages)
print(output)
|