llm_math_reasoning / models /deepseek_qwen.py
MingLi
local for hpc
4125e43
# models/deepseek_qwen.py
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
class DeepSeekQwenModel:
def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", device="cuda"):
self.tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True,
local_files_only=True
)
self.model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
torch_dtype=torch.bfloat16,
local_files_only=True
).to(device)
self.device = device
def generate(self, prompt: str, max_new_tokens=1024) -> str:
inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
# δΌ˜εŒ–η”Ÿζˆε‚ζ•°οΌˆι€ŸεΊ¦δΌ˜ε…ˆοΌ‰
output = self.model.generate(
**inputs,
max_new_tokens=max_new_tokens,
do_sample=True,
temperature=0.7,
pad_token_id=self.tokenizer.eos_token_id,
use_cache=True, # ε―η”¨ηΌ“ε­˜εŠ ι€Ÿ
repetition_penalty=1.2 # ζŠ‘εˆΆι‡ε€η”Ÿζˆ
)
decoded = self.tokenizer.decode(output[0], skip_special_tokens=True)
return decoded[len(prompt):].strip()