# models/deepseek_qwen.py from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch class DeepSeekQwenModel: def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", device="cuda"): self.tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True, local_files_only=True ) self.model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.bfloat16, local_files_only=True ).to(device) self.device = device def generate(self, prompt: str, max_new_tokens=1024) -> str: inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) # 优化生成参数(速度优先) output = self.model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7, pad_token_id=self.tokenizer.eos_token_id, use_cache=True, # 启用缓存加速 repetition_penalty=1.2 # 抑制重复生成 ) decoded = self.tokenizer.decode(output[0], skip_special_tokens=True) return decoded[len(prompt):].strip()