Spaces:
Runtime error
Runtime error
#!python | |
# -*- coding: utf-8 -*- | |
# @author: Kun | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from peft import PeftModel | |
max_token: int = 10000 # 10000 # 64 | |
temperature: float = 0.75 | |
top_p = 0.9 | |
use_lora = False | |
# def load_model(): | |
# model_name_or_path = "baichuan-inc/baichuan-7B" | |
# # model_name_or_path = "~/.cache/huggingface/hub/models--baichuan-inc--baichuan-7B/snapshots/39916f64eb892ccdc1982b0eef845b3b8fd43f6b/" | |
# tokenizer = AutoTokenizer.from_pretrained( | |
# model_name_or_path, | |
# trust_remote_code=True) | |
# model = AutoModelForCausalLM.from_pretrained( | |
# model_name_or_path, | |
# device_map="auto", | |
# trust_remote_code=True) | |
# # inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt') | |
# # inputs = inputs.to('cuda:0') | |
# # pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1) | |
# # print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True)) | |
# return tokenizer, model | |
def load_model(use_lora=True, LOAD_IN_8BIT=False): | |
""" | |
params: | |
use_lora=True, LOAD_IN_8BIT=False | |
use_lora=False. LOAD_IN_8BIT=True | |
""" | |
tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", | |
trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", | |
device_map="auto", | |
trust_remote_code=True, | |
load_in_8bit=LOAD_IN_8BIT, # if not have enough GPU memory, then use 8bit | |
) | |
if use_lora: | |
model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft") | |
return tokenizer, model | |