Spaces:

xu-song
/

self-chat

Running

xu song commited on Jul 29, 2024

Commit

10e2ac5

1 Parent(s): c38b609

update

Files changed (1) hide show

models/cpp_qwen2.py CHANGED Viewed

@@ -27,7 +27,8 @@ class Qwen2Simulator(Simulator):
     def __init__(self, from_local=False):
         if from_local:
-            self.hf_tokenizer = AutoTokenizer.from_pretrained("/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
             self.llm = llama_cpp.Llama(
                 model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
                 tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
@@ -41,14 +42,11 @@ class Qwen2Simulator(Simulator):
                 tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
                 verbose=False,
             )
-            logger.info(f"llm has been initialized: {self.llm}")
         # warmup
         ### local
     def generate_query(self, messages):
         """
         :param messages:
@@ -79,7 +77,6 @@ class Qwen2Simulator(Simulator):
         # for new_text in self._stream_generate(input_ids):
         #     yield new_text
     def _generate(self, inputs):
         """
         qwen2-0.5b-chat 有bug：有时user生成结束没有<|im_end|>，示例：
@@ -105,12 +102,9 @@ class Qwen2Simulator(Simulator):
         return output_text
 bot = Qwen2Simulator()
 if __name__ == "__main__":
     # messages = [
     #     {"role": "system", "content": "you are a helpful assistant"},
     #     {"role": "user", "content": "What is the capital of France?"}

     def __init__(self, from_local=False):
         if from_local:
+            self.hf_tokenizer = AutoTokenizer.from_pretrained(
+                "/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct/")
             self.llm = llama_cpp.Llama(
                 model_path="/workspace/xusong/huggingface/models/Qwen2-0.5B-Instruct-GGUF/qwen2-0_5b-instruct-fp16.gguf",
                 tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
                 tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer(self.hf_tokenizer),
                 verbose=False,
             )
+        logger.info(f"llm has been initialized: {self.llm}")
         # warmup
         ### local
     def generate_query(self, messages):
         """
         :param messages:
         # for new_text in self._stream_generate(input_ids):
         #     yield new_text
     def _generate(self, inputs):
         """
         qwen2-0.5b-chat 有bug：有时user生成结束没有<|im_end|>，示例：
         return output_text
 bot = Qwen2Simulator()
 if __name__ == "__main__":
     # messages = [
     #     {"role": "system", "content": "you are a helpful assistant"},
     #     {"role": "user", "content": "What is the capital of France?"}