File size: 4,227 Bytes
9dc0e21 8988bbf 9dc0e21 c38b609 9dc0e21 8988bbf f29252d c38b609 9dc0e21 c38b609 9dc0e21 8988bbf c38b609 8988bbf 9dc0e21 c38b609 f29252d 9dc0e21 c38b609 f29252d 9dc0e21 8988bbf 9dc0e21 8988bbf 9dc0e21 8988bbf 9dc0e21 8988bbf 9dc0e21 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
"""
https://github.com/abetlen/llama-cpp-python/blob/main/examples/gradio_chat/local.py
https://github.com/awinml/llama-cpp-python-bindings
python convert_hf_to_gguf.py --outtype f16 Qwen1.5-0.5B-Chat
python convert_hf_to_gguf.py /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "I believe the meaning of life is" -n 128
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -f prompt.txt -n 128
./llama-cli -m /workspace/xusong/huggingface/models/Qwen1.5-0.5B-Chat/Qwen1.5-0.5B-Chat-F16.gguf -p "You are a helpful assistant" -cnv
"""
import json
from simulator import Simulator
import llama_cpp
# import llama_cpp.llama_tokenizer
from transformers import AutoTokenizer
from log_util import logger
class Qwen2Simulator(Simulator):
def __init__(self, from_local=False):
if from_local:
self.hf_tokenizer = AutoTokenizer.from_pretrained("/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
self.llm = llama_cpp.Llama(
model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
verbose=False,
)
else:
self.hf_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
self.llm = llama_cpp.Llama.from_pretrained(
repo_id="Qwen/Qwen2-0.5B-Instruct-GGUF",
filename="*fp16.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
verbose=False,
)
logger.info(f"llm has been initialized: {self.llm}")
# warmup
### local
def generate_query(self, messages):
"""
:param messages:
:return:
"""
assert messages[-1]["role"] != "user"
logger.info(f"generating {json.dumps(messages)}")
inputs = self.hf_tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=False,
)
inputs = inputs + "<|im_start|>user\n"
return self._generate(inputs)
# for new_text in self._stream_generate(input_ids):
# yield new_text
def generate_response(self, messages):
assert messages[-1]["role"] == "user"
logger.info(f"generating {json.dumps(messages)}")
inputs = self.hf_tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True
)
return self._generate(inputs)
# for new_text in self._stream_generate(input_ids):
# yield new_text
def _generate(self, inputs):
"""
qwen2-0.5b-chat 有bug:有时user生成结束没有<|im_end|>,示例:
<|im_start|>system
you are a helpful assistant<|im_end|>
<|im_start|>user
hi, what your name<|im_end|>
<|im_start|>assistant
My name is Jordan<|im_end|>
<|im_start|>user # 以上是输入,以下是生成
how old are you?
<|im_start|>assistant
I am a 41-year-old man.<|im_end|>
"""
# stream=False
output = self.llm(
inputs,
max_tokens=20,
temperature=5,
stop=["<|im_end|>", "<|im_start|>"]
)
output_text = output["choices"][0]["text"]
return output_text
bot = Qwen2Simulator()
if __name__ == "__main__":
# messages = [
# {"role": "system", "content": "you are a helpful assistant"},
# {"role": "user", "content": "What is the capital of France?"}
# ]
# output = bot.generate_response(messages)
# print(output)
messages = [
{"role": "system", "content": "you are a helpful assistant"},
{"role": "user", "content": "hi, what your name"},
{"role": "assistant", "content": "My name is Jordan"}
]
output = bot.generate_query(messages)
print(output)
|