Spaces:

xiaoxishui
/

internlmwtzs

Sleeping

xiaoxishui commited on Jan 16

Commit

1559fe0

1 Parent(s): 728a621

Update transformers to 4.39.3 and optimize model loading

Files changed (2) hide show

app.py CHANGED Viewed

@@ -36,6 +36,7 @@ model_name_or_path = "xiaoxishui/internlm2_5-7b-chat"
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 @dataclass
 class GenerationConfig:
     # this config is used for chat to provide more diversity
@@ -187,7 +188,10 @@ def on_btn_click():
 def load_model():
     model = (AutoModelForCausalLM.from_pretrained(
         model_name_or_path,
-        trust_remote_code=True).to(torch.bfloat16).cuda())
     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,
                                               trust_remote_code=True)
     return model, tokenizer
@@ -210,17 +214,16 @@ def prepare_generation_config():
     return generation_config
-user_prompt = '<|im_start|>user\n{user}<|im_end|>\n'
-robot_prompt = '<|im_start|>assistant\n{robot}<|im_end|>\n'
-cur_query_prompt = '<|im_start|>user\n{user}<|im_end|>\n\
-    <|im_start|>assistant\n'
 def combine_history(prompt):
     messages = st.session_state.messages
     meta_instruction = ('You are a helpful, honest, '
                         'and harmless AI assistant.')
-    total_prompt = f'<s><|im_start|>system\n{meta_instruction}<|im_end|>\n'
     for message in messages:
         cur_content = message['content']
         if message['role'] == 'user':
@@ -293,4 +296,3 @@ def main():
 if __name__ == '__main__':
     main()

 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 print(f"Using device: {device}")
 @dataclass
 class GenerationConfig:
     # this config is used for chat to provide more diversity
 def load_model():
     model = (AutoModelForCausalLM.from_pretrained(
         model_name_or_path,
+        trust_remote_code=True,
+        use_cache=False,  # 禁用 KV 缓存
+        torch_dtype=torch.bfloat16,
+        device_map="auto")).cuda()
     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,
                                               trust_remote_code=True)
     return model, tokenizer
     return generation_config
+user_prompt = '👥\n{user}\n'
+robot_prompt = '🤖\n{robot}\n'
+cur_query_prompt = '👥\n{user}\n'
 def combine_history(prompt):
     messages = st.session_state.messages
     meta_instruction = ('You are a helpful, honest, '
                         'and harmless AI assistant.')
+    total_prompt = f'<s>🤖\n{meta_instruction}\n'
     for message in messages:
         cur_content = message['content']
         if message['role'] == 'user':
 if __name__ == '__main__':
     main()

requirements.txt CHANGED Viewed

@@ -1,5 +1,5 @@
 streamlit>=1.8.0
-transformers==4.36.0
 torch>=2.0.0
 accelerate>=0.20.0
 sentencepiece

 streamlit>=1.8.0
+transformers==4.39.3
 torch>=2.0.0
 accelerate>=0.20.0
 sentencepiece