File size: 810 Bytes
cca48c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
from transformers import AutoModelForCausalLM, AutoTokenizer
model = None
tokenizer = None
def setup():
global model
global tokenizer
model = AutoModelForCausalLM.from_pretrained(
"/data/Llama-3.2-1B-Instruct", # Updated path for Inference Endpoints
load_in_8bit=True, # Use 8-bit quantization for efficiency, adjust if needed
device_map="auto", # Let HF determine optimal device placement
)
tokenizer = AutoTokenizer.from_pretrained("/data/Llama-3.2-1B-Instruct") # Updated path
def generate(inputs):
global model
global tokenizer
input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(model.device)
output = model.generate(input_ids)
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
return decoded_output |