fwittel philschmid HF staff commited on
Commit
9bd5173
·
1 Parent(s): 0af1b4a

Update handler.py (#1)

Browse files

- Update handler.py (e52e5bbe1f709006910d411d04e2b84270ce3224)


Co-authored-by: Philipp Schmid <[email protected]>

Files changed (1) hide show
  1. handler.py +2 -2
handler.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
  from typing import Dict, List, Any
3
  # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
5
 
6
  # check for GPU
7
  device = 0 if torch.cuda.is_available() else -1
@@ -13,7 +13,7 @@ class EndpointHandler:
13
  tokenizer = AutoTokenizer.from_pretrained(path)
14
  # model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
15
  # model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
16
- model = AutoModelForSeq2SeqLM.from_pretrained(path, low_cpu_mem_usage=True)
17
  # create inference pipeline
18
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
19
 
 
1
  import torch
2
  from typing import Dict, List, Any
3
  # from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
 
6
  # check for GPU
7
  device = 0 if torch.cuda.is_available() else -1
 
13
  tokenizer = AutoTokenizer.from_pretrained(path)
14
  # model = AutoModel.from_pretrained(path, low_cpu_mem_usage=True)
15
  # model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
16
+ model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True)
17
  # create inference pipeline
18
  self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer, device=device)
19