gsaivinay commited on
Commit
8324d4a
·
1 Parent(s): 40b0053

Create handler.py

Browse files
Files changed (1) hide show
  1. handler.py +30 -0
handler.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, pipeline, logging
2
+ from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
3
+
4
+ class EndpointHandler:
5
+ def __init__(self, path=""):
6
+ # load the model
7
+ tokenizer = AutoTokenizer.from_pretrained(path, use_fast=True)
8
+
9
+ model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
10
+ use_safetensors=True,
11
+ trust_remote_code=False,
12
+ use_triton=False,
13
+ quantize_config=None
14
+ )
15
+
16
+
17
+ # create inference pipeline
18
+ self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
19
+
20
+ def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
21
+ inputs = data.pop("inputs", data)
22
+ parameters = data.pop("parameters", None)
23
+
24
+ # pass inputs with all kwargs in data
25
+ if parameters is not None:
26
+ prediction = self.pipeline(inputs, **parameters)
27
+ else:
28
+ prediction = self.pipeline(inputs)
29
+ # postprocess the prediction
30
+ return prediction