fslm / model.py
NisargUpadhyay's picture
Upload 5 files
cca48c9 verified
raw
history blame contribute delete
810 Bytes
from transformers import AutoModelForCausalLM, AutoTokenizer
model = None
tokenizer = None
def setup():
global model
global tokenizer
model = AutoModelForCausalLM.from_pretrained(
"/data/Llama-3.2-1B-Instruct", # Updated path for Inference Endpoints
load_in_8bit=True, # Use 8-bit quantization for efficiency, adjust if needed
device_map="auto", # Let HF determine optimal device placement
)
tokenizer = AutoTokenizer.from_pretrained("/data/Llama-3.2-1B-Instruct") # Updated path
def generate(inputs):
global model
global tokenizer
input_ids = tokenizer(inputs, return_tensors="pt").input_ids.to(model.device)
output = model.generate(input_ids)
decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)
return decoded_output