oleksandrfluxon commited on
Commit
377986e
1 Parent(s): 76a36d6

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +2 -2
handler.py CHANGED
@@ -1,7 +1,7 @@
1
  import torch
2
 
3
  from typing import Any, Dict
4
- from transformers import AutoModelForCausalLM, AutoTokenizer
5
 
6
 
7
  class EndpointHandler:
@@ -9,7 +9,7 @@ class EndpointHandler:
9
  # load model and tokenizer from path
10
  self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
11
 
12
- config = transformers.AutoConfig.from_pretrained(name, trust_remote_code=True)
13
  config.attn_config['attn_impl'] = 'triton'
14
  config.init_device = 'cuda:0' # For fast initialization directly on GPU!
15
  config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096
 
1
  import torch
2
 
3
  from typing import Any, Dict
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
5
 
6
 
7
  class EndpointHandler:
 
9
  # load model and tokenizer from path
10
  self.tokenizer = AutoTokenizer.from_pretrained(path) # AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
11
 
12
+ config = AutoConfig.from_pretrained(name, trust_remote_code=True)
13
  config.attn_config['attn_impl'] = 'triton'
14
  config.init_device = 'cuda:0' # For fast initialization directly on GPU!
15
  config.max_seq_len = 4096 # (input + output) tokens can now be up to 4096