File size: 1,094 Bytes

885e5db
 
 
33c9fa3
885e5db
 
 
33c9fa3
885e5db
f78c246
 
fe67443
33c9fa3
885e5db
9e3efff
885e5db

from typing import  Dict, List, Any
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from peft import PeftModel
import os

class EndpointHandler():
    def __init__(self, path=""):
        HF_TOKEN = os.getenv("HF_TOKEN")
        model = AutoModelForCausalLM.from_pretrained(
            "meta-llama/Llama-3.1-8B-Instruct",
            torch_dtype=None,
            device_map="auto",
            token=HF_TOKEN
        )
        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct", trust_remote_code=True, token=HF_TOKEN)
        model = PeftModel.from_pretrained(model, path)
        model = model.merge_and_unload()
        self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)

    def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
        inputs = data.pop("inputs", data)
        parameters = data.pop("parameters", None)
        if parameters is not None:
            prediction = self.pipeline(inputs, **parameters)
        else:
            prediction = self.pipeline(inputs)
        return prediction