from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, pipeline # Global variables to cache model and tokenizer model = None tokenizer = None nlp = None def init(): global model, tokenizer, nlp model_name_or_path = "." model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path) tokenizer = AutoTokenizer.from_pretrained(model_name_or_path) nlp = pipeline("text2text-generation", model=model, tokenizer=tokenizer) def inference(payload): inputs = payload.get("inputs", "") if not inputs: return {"error": "No inputs provided"} # Run generation pipeline outputs = nlp(inputs, max_length=256) return outputs