Spaces:
Runtime error
Runtime error
File size: 2,648 Bytes
9f8f34e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
from peft import PeftModel
model_name = "DUTIRbionlp/Taiyi-LLM"
model = AutoModelForCausalLM.from_pretrained(
model_name,
trust_remote_code=True,
low_cpu_mem_usage=True,
torch_dtype=torch.float16
)
model.eval()
tokenizer = AutoTokenizer.from_pretrained(
model_name,
trust_remote_code=True
)
import logging
logging.disable(logging.WARNING)
tokenizer.pad_token_id = tokenizer.eod_id
tokenizer.bos_token_id = tokenizer.eod_id
tokenizer.eos_token_id = tokenizer.eod_id
# 开始对话
history_max_len = 1000
utterance_id = 0
def run(message: str,
history: str,
max_new_tokens: int = 500,
temperature: float = 0.10,
top_p: float = 0.9,
repetition_penalty: float = 1.0):
list1 = []
for question, response in history:
question = tokenizer(question, return_tensors="pt", add_special_tokens=False).input_ids
# eos_token_id = [tokenizer.eos_token_id]
eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
response = tokenizer(response, return_tensors="pt", add_special_tokens=False).input_ids
all_token = torch.concat((question, eos_token_id, response, eos_token_id), dim=1)
list1.extend(all_token)
connect_tensor = torch.tensor([])
for tensor in list1:
connect_tensor = torch.concat((connect_tensor, tensor), dim=0)
history_token_ids = connect_tensor.reshape(1,-1)
user_input = message
input_ids = tokenizer(user_input, return_tensors="pt", add_special_tokens=False).input_ids
bos_token_id = torch.tensor([[tokenizer.bos_token_id]], dtype=torch.long)
eos_token_id = torch.tensor([[tokenizer.eos_token_id]], dtype=torch.long)
user_input_ids = torch.concat([bos_token_id,input_ids, eos_token_id], dim=1)
input_token_ids = torch.concat((history_token_ids, user_input_ids), dim=1)
model_input_ids = input_token_ids[:, -history_max_len:].to(torch.int)
with torch.no_grad():
outputs = model.generate(
input_ids=model_input_ids, max_new_tokens=max_new_tokens, do_sample=True, top_p=top_p,
temperature=temperature, repetition_penalty=repetition_penalty, eos_token_id=tokenizer.eos_token_id
)
model_input_ids_len = model_input_ids.size(1)
response_ids = outputs[:, model_input_ids_len:]
history_token_ids = torch.concat((history_token_ids, response_ids.cpu()), dim=1)
response = tokenizer.batch_decode(response_ids)
return response[0].strip().replace(tokenizer.eos_token, "").replace("\n", "\n\n")
|