File size: 1,323 Bytes
63c6bf0
 
 
 
 
 
 
 
 
 
 
 
4125e43
63c6bf0
 
 
 
 
4125e43
 
63c6bf0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# models/deepseek_qwen.py

from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

class DeepSeekQwenModel:
    def __init__(self, model_name="deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", device="cuda"):


        self.tokenizer = AutoTokenizer.from_pretrained(
            model_name, 
            trust_remote_code=True, 
            local_files_only=True
        )

        self.model = AutoModelForCausalLM.from_pretrained(
            model_name, 
            trust_remote_code=True, 
            torch_dtype=torch.bfloat16,
            local_files_only=True
        ).to(device)
        self.device = device

    def generate(self, prompt: str, max_new_tokens=1024) -> str:
        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.device)
        
        # 优化生成参数(速度优先)
        output = self.model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=0.7,
            pad_token_id=self.tokenizer.eos_token_id,
            use_cache=True,           # 启用缓存加速
            repetition_penalty=1.2    # 抑制重复生成
        )
        
        decoded = self.tokenizer.decode(output[0], skip_special_tokens=True)
        return decoded[len(prompt):].strip()