from typing import Dict, Any, List import torch import logging from transformers import AutoTokenizer, AutoModelForSeq2SeqLM logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) class EndpointHandler(): def __init__(self, path=""): self.device = 'cuda' if torch.cuda.is_available() else 'cpu' try: logger.info(f"Loading model and tokenizer from path: {path}") self.model = AutoModelForSeq2SeqLM.from_pretrained(f"{path}").to(self.device) self.tokenizer = AutoTokenizer.from_pretrained(f"{path}") except Exception as e: logger.error(f"Error loading model or tokenizer from path {path}: {e}") # Handle error (e.g., exit or set model/tokenizer to None) def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]: if self.model is None or self.tokenizer is None: error_message = "Model or tokenizer not properly initialized" logger.error(error_message) return [{"error": error_message}] inputs = data.get("inputs") if not inputs: return [{"error": "No inputs provided"}] tokenized_input = self.tokenizer(inputs, return_tensors="pt") input_ids,attention_masks = tokenized_input["input_ids"].to(self.device), tokenized_input["attention_mask"].to(self.device) # Move input tensors to the same device as model summary_ids = self.model.generate(input_ids=input_ids, attention_mask=attention_masks,) summary_text = self.tokenizer.decode(summary_ids[0], skip_special_tokens=True, clean_up_tokenization_spaces=True) print('good') return [{"summary": summary_text}]