|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
|
model = None |
|
tokenizer = None |
|
|
|
def setup(): |
|
global model |
|
global tokenizer |
|
|
|
model = AutoModelForCausalLM.from_pretrained( |
|
"/data/Llama-3.2-1B-Instruct", |
|
load_in_8bit=True, |
|
device_map="auto", |
|
) |
|
tokenizer = AutoTokenizer.from_pretrained("/data/Llama-3.2-1B-Instruct") |
|
|
|
def generate(inputs): |
|
global model |
|
global tokenizer |
|
|
|
input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(model.device) |
|
output = model.generate(input_ids) |
|
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True) |
|
return decoded_output |