# models/qwen2_5_math.py from transformers import AutoModelForCausalLM, AutoTokenizer import torch class QwenMathModel: def __init__(self, model_name="Qwen/Qwen2.5-Math-1.5B", device="cuda"): self.tokenizer = AutoTokenizer.from_pretrained( model_name, trust_remote_code=True, # token=token ) self.model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, # token=token, torch_dtype=torch.float16 ).to(device) self.device = device def generate(self, prompt: str, max_new_tokens=1024) -> str: inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device) # 优化生成参数 output = self.model.generate( **inputs, max_new_tokens=max_new_tokens, do_sample=True, temperature=0.7, pad_token_id=self.tokenizer.eos_token_id, use_cache=True ) decoded = self.tokenizer.decode(output[0], skip_special_tokens=True) return decoded[len(prompt):].strip()