File size: 1,622 Bytes
1f091e0 9780dc1 1f091e0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
from typing import Dict, List, Any
import torch
from transformers import pipeline
class EndpointHandler:
def __init__(self, path=""):
self.pipeline = pipeline(
task="text-generation",
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
device_map='auto',
#trust_remote_code=True,
model_kwargs={
"load_in_4bit": True
},
# batch_size=1,
)
# model.generation_config
def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
data args:
inputs (:obj: `str`)
parameters (:obj: `Dict`)
Return:
A :obj:`list` | `dict`: will be serialized and returned
"""
inputs = data.pop("inputs", "")
params = data.pop("parameters", ())
if not params:
params = dict()
# run normal prediction
generation = self.pipeline(inputs, **params)
# **generate_kwargs https://huggingface.co/docs/transformers/generation_strategies#customize-text-generation,
# https://huggingface.co/docs/transformers/generation_strategies#customize-text-generation
return generation
# Returns
# A list or a list of list of dict
# Returns one of the following dictionaries (cannot return a combination of both generated_text and generated_token_ids):
# generated_text (str, present when return_text=True) — The generated text.
# generated_token_ids (torch.Tensor or tf.Tensor, present when return_tensors=True) — The token ids of the generated text. |