tyang816 commited on
Commit
f3d3769
·
verified ·
1 Parent(s): 37af75f

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +17 -3
handler.py CHANGED
@@ -9,10 +9,24 @@ dtype = torch.bfloat16 if torch.cuda.get_device_capability()[0] == 8 else torch.
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
  # load the model
12
- tokenizer = AutoTokenizer.from_pretrained(path, trust_remote_code=True)
13
- model = AutoModelForCausalLM.from_pretrained(path, device_map="auto", torch_dtype=dtype, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
14
  # create inference pipeline
15
- self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
 
 
 
 
 
16
 
17
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
18
  inputs = data.pop("inputs", data)
 
9
  class EndpointHandler:
10
  def __init__(self, path=""):
11
  # load the model
12
+ tokenizer = AutoTokenizer.from_pretrained(
13
+ path,
14
+ trust_remote_code=True
15
+ )
16
+ model = AutoModelForCausalLM.from_pretrained(
17
+ path,
18
+ device_map="auto",
19
+ torch_dtype=dtype,
20
+ trust_remote_code=True,
21
+ revision="main"
22
+ )
23
  # create inference pipeline
24
+ self.pipeline = pipeline(
25
+ "text-generation",
26
+ model=model,
27
+ tokenizer=tokenizer,
28
+ trust_remote_code=True
29
+ )
30
 
31
  def __call__(self, data: Any) -> List[List[Dict[str, float]]]:
32
  inputs = data.pop("inputs", data)