File size: 2,648 Bytes
9f8f34e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch


from peft import PeftModel

model_name = "DUTIRbionlp/Taiyi-LLM"
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    trust_remote_code=True,
    low_cpu_mem_usage=True,
    torch_dtype=torch.float16
)

model.eval()

tokenizer = AutoTokenizer.from_pretrained(
    model_name,
    trust_remote_code=True
)


import logging
logging.disable(logging.WARNING)
tokenizer.pad_token_id = tokenizer.eod_id
tokenizer.bos_token_id = tokenizer.eod_id
tokenizer.eos_token_id = tokenizer.eod_id



# 开始对话
history_max_len = 1000 
utterance_id = 0


def run(message: str,
        history: str,
        max_new_tokens: int = 500,
        temperature: float = 0.10,
        top_p: float = 0.9,
        repetition_penalty: float = 1.0):

    list1 = []
    for question, response in history:
        
        question = tokenizer(question, return_tensors="pt", add_special_tokens=False).input_ids
        # eos_token_id = [tokenizer.eos_token_id]
        eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
        response = tokenizer(response, return_tensors="pt", add_special_tokens=False).input_ids
        all_token = torch.concat((question, eos_token_id, response, eos_token_id), dim=1)
        list1.extend(all_token)
    connect_tensor = torch.tensor([])
    for tensor in list1:
        connect_tensor = torch.concat((connect_tensor, tensor), dim=0)

    history_token_ids = connect_tensor.reshape(1,-1)



    user_input = message

    input_ids = tokenizer(user_input, return_tensors="pt", add_special_tokens=False).input_ids
    bos_token_id = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long)
    eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
    user_input_ids = torch.concat([bos_token_id,input_ids, eos_token_id], dim=1)

    input_token_ids = torch.concat((history_token_ids, user_input_ids), dim=1)
    model_input_ids = input_token_ids[:, -history_max_len:].to(torch.int)
    
    with torch.no_grad():
        outputs = model.generate(
            input_ids=model_input_ids, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p,
            temperature=temperature, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id
        )
   
   
    model_input_ids_len = model_input_ids.size(1)
    response_ids = outputs[:, model_input_ids_len:]
    history_token_ids = torch.concat((history_token_ids, response_ids.cpu()), dim=1)
    response = tokenizer.batch_decode(response_ids)
    return response[0].strip().replace(tokenizer.eos_token, "").replace("\n", "\n\n")